aboutsummaryrefslogtreecommitdiff
path: root/include/quickmedia
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-07-02 23:17:06 +0200
committerdec05eba <dec05eba@protonmail.com>2021-07-02 23:17:06 +0200
commit5f283adc8d1a29f420d466e85b216e9d6f4a9822 (patch)
tree1006965593e1f7c2571f0f74a64c3555fc03d506 /include/quickmedia
parent48ceb8591e1e3c7695d091f02a6a43edb5f77936 (diff)
Remove dependency on html tidy
Diffstat (limited to 'include/quickmedia')
-rw-r--r--include/quickmedia/HtmlSearch.h53
-rw-r--r--include/quickmedia/NodeSearch.h18
-rw-r--r--include/quickmedia/XpathTokenizer.h2
3 files changed, 50 insertions, 23 deletions
diff --git a/include/quickmedia/HtmlSearch.h b/include/quickmedia/HtmlSearch.h
index bedde03..63f2175 100644
--- a/include/quickmedia/HtmlSearch.h
+++ b/include/quickmedia/HtmlSearch.h
@@ -2,47 +2,74 @@
#define QUICKMEDIA_HTML_SEARCH_H
#include "NodeSearch.h"
+#include <HtmlParser.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
+typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute;
+typedef struct QuickMediaHtmlNode QuickMediaHtmlNode;
+typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode;
+typedef struct QuickMediaTextNode QuickMediaTextNode;
+
typedef struct {
char *data;
size_t size;
size_t capacity;
} QuickMediaString;
+struct QuickMediaHtmlAttribute {
+ QuickMediaStringView key;
+ QuickMediaStringView value;
+ QuickMediaHtmlAttribute *next;
+};
+
+struct QuickMediaHtmlNode {
+ int is_tag; /* 0 = text, 1 = tag */
+ QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */
+ QuickMediaHtmlAttribute *first_attribute;
+ QuickMediaHtmlAttribute *last_attribute;
+ QuickMediaHtmlChildNode *first_child;
+ QuickMediaHtmlChildNode *last_child;
+ QuickMediaHtmlNode *parent;
+};
+
+struct QuickMediaHtmlChildNode {
+ QuickMediaHtmlNode node;
+ QuickMediaHtmlChildNode *next;
+};
+
typedef struct {
- const void *doc;
- const void *node;
- QuickMediaString text;
-} QuickMediaHtmlNode;
+ QuickMediaHtmlNode *node;
+ QuickMediaString __str;
+} QuickMediaMatchNode;
typedef struct {
- const void *doc;
+ QuickMediaHtmlNode root_node;
} QuickMediaHtmlSearch;
/*
- Returns NULL if attribute doesn't exist or if it doesn't have any value.
+ Returns an empty string view if attribute doesn't exist or if it doesn't have any value.
The result is only valid within the callback function scope.
*/
-const char* quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name);
+QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name);
/*
- Returns NULL if the node doesn't have any text.
+ Returns an empty string if the node doesn't have any text or if there was an error creating the text.
The result is only valid within the callback function scope.
*/
-const char* quickmedia_html_node_get_text(QuickMediaHtmlNode *self);
+QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self);
-/* @node is only valid within the callback function scope */
-typedef void (*QuickMediaHtmlSearchResultCallback)(QuickMediaHtmlNode *node, void *userdata);
+/* @node is only valid within the callback function scope. Return 0 to continue */
+typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata);
-int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source);
+/* |html_source| should be in utf8 format and may contain utf8 BOM */
+int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size);
void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self);
-/* Non-standard xpath. Doesn't use '@' symbol for accessing properties */
+/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */
int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);
#ifdef __cplusplus
diff --git a/include/quickmedia/NodeSearch.h b/include/quickmedia/NodeSearch.h
index adaac44..9e7fd0c 100644
--- a/include/quickmedia/NodeSearch.h
+++ b/include/quickmedia/NodeSearch.h
@@ -1,30 +1,32 @@
#ifndef QUICKMEDIA_NODE_SEARCH_H
#define QUICKMEDIA_NODE_SEARCH_H
+#include <stddef.h>
+
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
- char *name;
- char *value;
+ const char *data;
+ size_t size;
+} QuickMediaStringView;
+
+typedef struct {
+ QuickMediaStringView name;
+ QuickMediaStringView value;
int defined;
} QuickMediaNodeSearchParam;
typedef struct QuickMediaNodeSearch QuickMediaNodeSearch;
struct QuickMediaNodeSearch {
- char *name; /* optional */
+ QuickMediaStringView name; /* optional */
int recursive;
QuickMediaNodeSearchParam param; /* optional */
QuickMediaNodeSearch *child; /* optional */
};
-typedef struct {
- const char *data;
- unsigned long long size;
-} QuickMediaStringView;
-
void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self);
void quickmedia_node_search_init(QuickMediaNodeSearch *self);
void quickmedia_node_search_deinit(QuickMediaNodeSearch *self);
diff --git a/include/quickmedia/XpathTokenizer.h b/include/quickmedia/XpathTokenizer.h
index cada673..62f6d75 100644
--- a/include/quickmedia/XpathTokenizer.h
+++ b/include/quickmedia/XpathTokenizer.h
@@ -30,8 +30,6 @@ typedef enum {
void quickmedia_xpath_tokenizer_init(QuickMediaXpathTokenizer *self, const char *xpath);
QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *self);
int quickmedia_xpath_tokenizer_next_if(QuickMediaXpathTokenizer *self, QuickMediaXpathToken token);
-char* quickmedia_xpath_tokenizer_copy_identifier(QuickMediaXpathTokenizer *self);
-char* quickmedia_xpath_tokenizer_copy_string(QuickMediaXpathTokenizer *self);
#ifdef __cplusplus
}