From 5f283adc8d1a29f420d466e85b216e9d6f4a9822 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 2 Jul 2021 23:17:06 +0200 Subject: Remove dependency on html tidy --- include/quickmedia/HtmlSearch.h | 53 ++++++++++++++++++++++++++++--------- include/quickmedia/NodeSearch.h | 18 +++++++------ include/quickmedia/XpathTokenizer.h | 2 -- 3 files changed, 50 insertions(+), 23 deletions(-) (limited to 'include/quickmedia') diff --git a/include/quickmedia/HtmlSearch.h b/include/quickmedia/HtmlSearch.h index bedde03..63f2175 100644 --- a/include/quickmedia/HtmlSearch.h +++ b/include/quickmedia/HtmlSearch.h @@ -2,47 +2,74 @@ #define QUICKMEDIA_HTML_SEARCH_H #include "NodeSearch.h" +#include #include #ifdef __cplusplus extern "C" { #endif +typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute; +typedef struct QuickMediaHtmlNode QuickMediaHtmlNode; +typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode; +typedef struct QuickMediaTextNode QuickMediaTextNode; + typedef struct { char *data; size_t size; size_t capacity; } QuickMediaString; +struct QuickMediaHtmlAttribute { + QuickMediaStringView key; + QuickMediaStringView value; + QuickMediaHtmlAttribute *next; +}; + +struct QuickMediaHtmlNode { + int is_tag; /* 0 = text, 1 = tag */ + QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */ + QuickMediaHtmlAttribute *first_attribute; + QuickMediaHtmlAttribute *last_attribute; + QuickMediaHtmlChildNode *first_child; + QuickMediaHtmlChildNode *last_child; + QuickMediaHtmlNode *parent; +}; + +struct QuickMediaHtmlChildNode { + QuickMediaHtmlNode node; + QuickMediaHtmlChildNode *next; +}; + typedef struct { - const void *doc; - const void *node; - QuickMediaString text; -} QuickMediaHtmlNode; + QuickMediaHtmlNode *node; + QuickMediaString __str; +} QuickMediaMatchNode; typedef struct { - const void *doc; + QuickMediaHtmlNode root_node; } QuickMediaHtmlSearch; /* - Returns NULL if attribute doesn't exist or if it doesn't have any value. + Returns an empty string view if attribute doesn't exist or if it doesn't have any value. The result is only valid within the callback function scope. */ -const char* quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name); +QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name); /* - Returns NULL if the node doesn't have any text. + Returns an empty string if the node doesn't have any text or if there was an error creating the text. The result is only valid within the callback function scope. */ -const char* quickmedia_html_node_get_text(QuickMediaHtmlNode *self); +QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self); -/* @node is only valid within the callback function scope */ -typedef void (*QuickMediaHtmlSearchResultCallback)(QuickMediaHtmlNode *node, void *userdata); +/* @node is only valid within the callback function scope. Return 0 to continue */ +typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata); -int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source); +/* |html_source| should be in utf8 format and may contain utf8 BOM */ +int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size); void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self); -/* Non-standard xpath. Doesn't use '@' symbol for accessing properties */ +/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */ int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata); #ifdef __cplusplus diff --git a/include/quickmedia/NodeSearch.h b/include/quickmedia/NodeSearch.h index adaac44..9e7fd0c 100644 --- a/include/quickmedia/NodeSearch.h +++ b/include/quickmedia/NodeSearch.h @@ -1,30 +1,32 @@ #ifndef QUICKMEDIA_NODE_SEARCH_H #define QUICKMEDIA_NODE_SEARCH_H +#include + #ifdef __cplusplus extern "C" { #endif typedef struct { - char *name; - char *value; + const char *data; + size_t size; +} QuickMediaStringView; + +typedef struct { + QuickMediaStringView name; + QuickMediaStringView value; int defined; } QuickMediaNodeSearchParam; typedef struct QuickMediaNodeSearch QuickMediaNodeSearch; struct QuickMediaNodeSearch { - char *name; /* optional */ + QuickMediaStringView name; /* optional */ int recursive; QuickMediaNodeSearchParam param; /* optional */ QuickMediaNodeSearch *child; /* optional */ }; -typedef struct { - const char *data; - unsigned long long size; -} QuickMediaStringView; - void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self); void quickmedia_node_search_init(QuickMediaNodeSearch *self); void quickmedia_node_search_deinit(QuickMediaNodeSearch *self); diff --git a/include/quickmedia/XpathTokenizer.h b/include/quickmedia/XpathTokenizer.h index cada673..62f6d75 100644 --- a/include/quickmedia/XpathTokenizer.h +++ b/include/quickmedia/XpathTokenizer.h @@ -30,8 +30,6 @@ typedef enum { void quickmedia_xpath_tokenizer_init(QuickMediaXpathTokenizer *self, const char *xpath); QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *self); int quickmedia_xpath_tokenizer_next_if(QuickMediaXpathTokenizer *self, QuickMediaXpathToken token); -char* quickmedia_xpath_tokenizer_copy_identifier(QuickMediaXpathTokenizer *self); -char* quickmedia_xpath_tokenizer_copy_string(QuickMediaXpathTokenizer *self); #ifdef __cplusplus } -- cgit v1.2.3