From 5f283adc8d1a29f420d466e85b216e9d6f4a9822 Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Fri, 2 Jul 2021 23:17:06 +0200
Subject: Remove dependency on html tidy

---
 include/quickmedia/HtmlSearch.h     | 53 ++++++++++++++++++++++++++++---------
 include/quickmedia/NodeSearch.h     | 18 +++++++------
 include/quickmedia/XpathTokenizer.h |  2 --
 3 files changed, 50 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/quickmedia/HtmlSearch.h b/include/quickmedia/HtmlSearch.h
index bedde03..63f2175 100644
--- a/include/quickmedia/HtmlSearch.h
+++ b/include/quickmedia/HtmlSearch.h
@@ -2,47 +2,74 @@
 #define QUICKMEDIA_HTML_SEARCH_H
 
 #include "NodeSearch.h"
+#include <HtmlParser.h>
 #include <stddef.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute;
+typedef struct QuickMediaHtmlNode QuickMediaHtmlNode;
+typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode;
+typedef struct QuickMediaTextNode QuickMediaTextNode;
+
 typedef struct {
     char *data;
     size_t size;
     size_t capacity;
 } QuickMediaString;
 
+struct QuickMediaHtmlAttribute {
+    QuickMediaStringView key;
+    QuickMediaStringView value;
+    QuickMediaHtmlAttribute *next;
+};
+
+struct QuickMediaHtmlNode {
+    int is_tag; /* 0 = text, 1 = tag */
+    QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */
+    QuickMediaHtmlAttribute *first_attribute;
+    QuickMediaHtmlAttribute *last_attribute;
+    QuickMediaHtmlChildNode *first_child;
+    QuickMediaHtmlChildNode *last_child;
+    QuickMediaHtmlNode *parent;
+};
+
+struct QuickMediaHtmlChildNode {
+    QuickMediaHtmlNode node;
+    QuickMediaHtmlChildNode *next;
+};
+
 typedef struct {
-    const void *doc;
-    const void *node;
-    QuickMediaString text;
-} QuickMediaHtmlNode;
+    QuickMediaHtmlNode *node;
+    QuickMediaString __str;
+} QuickMediaMatchNode;
 
 typedef struct {
-    const void *doc;
+    QuickMediaHtmlNode root_node;
 } QuickMediaHtmlSearch;
 
 /*
-    Returns NULL if attribute doesn't exist or if it doesn't have any value.
+    Returns an empty string view if attribute doesn't exist or if it doesn't have any value.
     The result is only valid within the callback function scope.
 */
-const char* quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name);
+QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name);
 
 /*
-    Returns NULL if the node doesn't have any text.
+    Returns an empty string if the node doesn't have any text or if there was an error creating the text.
     The result is only valid within the callback function scope.
 */
-const char* quickmedia_html_node_get_text(QuickMediaHtmlNode *self);
+QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self);
 
-/* @node is only valid within the callback function scope */
-typedef void (*QuickMediaHtmlSearchResultCallback)(QuickMediaHtmlNode *node, void *userdata);
+/* @node is only valid within the callback function scope. Return 0 to continue */
+typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata);
 
-int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source);
+/* |html_source| should be in utf8 format and may contain utf8 BOM */
+int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size);
 void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self);
 
-/* Non-standard xpath. Doesn't use '@' symbol for accessing properties */
+/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */
 int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);
 
 #ifdef __cplusplus
diff --git a/include/quickmedia/NodeSearch.h b/include/quickmedia/NodeSearch.h
index adaac44..9e7fd0c 100644
--- a/include/quickmedia/NodeSearch.h
+++ b/include/quickmedia/NodeSearch.h
@@ -1,30 +1,32 @@
 #ifndef QUICKMEDIA_NODE_SEARCH_H
 #define QUICKMEDIA_NODE_SEARCH_H
 
+#include <stddef.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 typedef struct {
-    char *name;
-    char *value;
+    const char *data;
+    size_t size;
+} QuickMediaStringView;
+
+typedef struct {
+    QuickMediaStringView name;
+    QuickMediaStringView value;
     int defined;
 } QuickMediaNodeSearchParam;
 
 typedef struct QuickMediaNodeSearch QuickMediaNodeSearch;
 
 struct QuickMediaNodeSearch {
-    char *name; /* optional */
+    QuickMediaStringView name; /* optional */
     int recursive;
     QuickMediaNodeSearchParam param; /* optional */
     QuickMediaNodeSearch *child; /* optional */
 };
 
-typedef struct {
-    const char *data;
-    unsigned long long size;
-} QuickMediaStringView;
-
 void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self);
 void quickmedia_node_search_init(QuickMediaNodeSearch *self);
 void quickmedia_node_search_deinit(QuickMediaNodeSearch *self);
diff --git a/include/quickmedia/XpathTokenizer.h b/include/quickmedia/XpathTokenizer.h
index cada673..62f6d75 100644
--- a/include/quickmedia/XpathTokenizer.h
+++ b/include/quickmedia/XpathTokenizer.h
@@ -30,8 +30,6 @@ typedef enum {
 void quickmedia_xpath_tokenizer_init(QuickMediaXpathTokenizer *self, const char *xpath);
 QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *self);
 int quickmedia_xpath_tokenizer_next_if(QuickMediaXpathTokenizer *self, QuickMediaXpathToken token);
-char* quickmedia_xpath_tokenizer_copy_identifier(QuickMediaXpathTokenizer *self);
-char* quickmedia_xpath_tokenizer_copy_string(QuickMediaXpathTokenizer *self);
 
 #ifdef __cplusplus
 }
-- 
cgit v1.2.3-70-g09d2