From 5f283adc8d1a29f420d466e85b216e9d6f4a9822 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 2 Jul 2021 23:17:06 +0200 Subject: Remove dependency on html tidy --- include/quickmedia/HtmlSearch.h | 53 +++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) (limited to 'include/quickmedia/HtmlSearch.h') diff --git a/include/quickmedia/HtmlSearch.h b/include/quickmedia/HtmlSearch.h index bedde03..63f2175 100644 --- a/include/quickmedia/HtmlSearch.h +++ b/include/quickmedia/HtmlSearch.h @@ -2,47 +2,74 @@ #define QUICKMEDIA_HTML_SEARCH_H #include "NodeSearch.h" +#include #include #ifdef __cplusplus extern "C" { #endif +typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute; +typedef struct QuickMediaHtmlNode QuickMediaHtmlNode; +typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode; +typedef struct QuickMediaTextNode QuickMediaTextNode; + typedef struct { char *data; size_t size; size_t capacity; } QuickMediaString; +struct QuickMediaHtmlAttribute { + QuickMediaStringView key; + QuickMediaStringView value; + QuickMediaHtmlAttribute *next; +}; + +struct QuickMediaHtmlNode { + int is_tag; /* 0 = text, 1 = tag */ + QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */ + QuickMediaHtmlAttribute *first_attribute; + QuickMediaHtmlAttribute *last_attribute; + QuickMediaHtmlChildNode *first_child; + QuickMediaHtmlChildNode *last_child; + QuickMediaHtmlNode *parent; +}; + +struct QuickMediaHtmlChildNode { + QuickMediaHtmlNode node; + QuickMediaHtmlChildNode *next; +}; + typedef struct { - const void *doc; - const void *node; - QuickMediaString text; -} QuickMediaHtmlNode; + QuickMediaHtmlNode *node; + QuickMediaString __str; +} QuickMediaMatchNode; typedef struct { - const void *doc; + QuickMediaHtmlNode root_node; } QuickMediaHtmlSearch; /* - Returns NULL if attribute doesn't exist or if it doesn't have any value. + Returns an empty string view if attribute doesn't exist or if it doesn't have any value. The result is only valid within the callback function scope. */ -const char* quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name); +QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name); /* - Returns NULL if the node doesn't have any text. + Returns an empty string if the node doesn't have any text or if there was an error creating the text. The result is only valid within the callback function scope. */ -const char* quickmedia_html_node_get_text(QuickMediaHtmlNode *self); +QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self); -/* @node is only valid within the callback function scope */ -typedef void (*QuickMediaHtmlSearchResultCallback)(QuickMediaHtmlNode *node, void *userdata); +/* @node is only valid within the callback function scope. Return 0 to continue */ +typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata); -int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source); +/* |html_source| should be in utf8 format and may contain utf8 BOM */ +int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size); void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self); -/* Non-standard xpath. Doesn't use '@' symbol for accessing properties */ +/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */ int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata); #ifdef __cplusplus -- cgit v1.2.3