aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksi Lindeman <dec05eba@protonmail.com>2019-05-25 03:58:20 +0200
committerAleksi Lindeman <dec05eba@protonmail.com>2019-05-25 03:58:23 +0200
commitc5f4811d0ba74715c8e128da133248cc399a6a6a (patch)
tree38c50f1a5c8be839d1101bb8b5b7fd13f16ee4a5
parent9c52fc54400d706f8f7cdf82d3df2da733862819 (diff)
Allow reusing doc for multiple xpath searches
-rw-r--r--include/quickmedia/HtmlSearch.h9
-rw-r--r--src/HtmlSearch.c39
-rw-r--r--tests/main.c12
3 files changed, 42 insertions, 18 deletions
diff --git a/include/quickmedia/HtmlSearch.h b/include/quickmedia/HtmlSearch.h
index e3bea33..568e101 100644
--- a/include/quickmedia/HtmlSearch.h
+++ b/include/quickmedia/HtmlSearch.h
@@ -13,6 +13,10 @@ typedef struct {
void *text;
} QuickMediaHtmlNode;
+typedef struct {
+ const void *doc;
+} QuickMediaHtmlSearch;
+
/* Returns NULL if attribute doesn't exist or if it doesn't have any value */
const char* quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name);
@@ -22,7 +26,10 @@ const QuickMediaStringView quickmedia_html_node_get_text(QuickMediaHtmlNode *sel
/* @node is only valid within the callback function scope */
typedef void (*QuickMediaHtmlSearchResultCallback)(QuickMediaHtmlNode *node, void *userdata);
-int quickmedia_html_find_nodes_xpath(const char *html_source, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);
+int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source);
+void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self);
+
+int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);
#ifdef __cplusplus
}
diff --git a/src/HtmlSearch.c b/src/HtmlSearch.c
index c3608dd..7868f32 100644
--- a/src/HtmlSearch.c
+++ b/src/HtmlSearch.c
@@ -99,35 +99,42 @@ const QuickMediaStringView quickmedia_html_node_get_text(QuickMediaHtmlNode *sel
return string_view;
}
-static int quickmedia_html_find_nodes(const char *html_source, QuickMediaNodeSearch *search_data, QuickMediaHtmlSearchResultCallback result_callback, void *userdata) {
- assert(html_source);
+static int quickmedia_html_find_nodes(QuickMediaHtmlSearch *self, QuickMediaNodeSearch *search_data, QuickMediaHtmlSearchResultCallback result_callback, void *userdata) {
assert(search_data);
assert(result_callback);
- if(!html_source || !search_data || !result_callback)
+ if(!search_data || !result_callback)
return -1;
+
+ TidyNode root_node = tidyGetRoot(self->doc);
+ find_child_nodes(self->doc, root_node, search_data, result_callback, userdata);
+ return 0;
+}
- TidyDoc tdoc = tidyCreate();
- tidyOptSetBool(tdoc, TidyShowWarnings, no);
- /* tidyOptSetBool(tdoc, TidyForceOutput, yes); */
- int rc = tidyParseString( tdoc, html_source);
- if(rc < 0) {
- tidyRelease(tdoc);
- return rc;
+int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source) {
+ self->doc = tidyCreate();
+ tidyOptSetBool(self->doc, TidyShowWarnings, no);
+ /* tidyOptSetBool(self->doc, TidyForceOutput, yes); */
+ if(tidyParseString(self->doc, html_source) < 0) {
+ tidyRelease(self->doc);
+ self->doc = NULL;
}
-
- TidyNode root_node = tidyGetRoot(tdoc);
- find_child_nodes(tdoc, root_node, search_data, result_callback, userdata);
- tidyRelease(tdoc);
return 0;
}
-int quickmedia_html_find_nodes_xpath(const char *html_source, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata) {
+void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self) {
+ if(self->doc) {
+ tidyRelease(self->doc);
+ self->doc = NULL;
+ }
+}
+
+int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata) {
QuickMediaNodeSearch search_data;
quickmedia_node_search_init(&search_data);
int result = quickmedia_parse_xpath(xpath, &search_data);
if(result != 0)
goto cleanup;
- result = quickmedia_html_find_nodes(html_source, &search_data, result_callback, userdata);
+ result = quickmedia_html_find_nodes(self, &search_data, result_callback, userdata);
cleanup:
quickmedia_node_search_deinit(&search_data);
return result;
diff --git a/tests/main.c b/tests/main.c
index 5b697a9..4d16ad6 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -26,7 +26,17 @@ static void result_callback(QuickMediaHtmlNode *node, void *userdata) {
int main(int argc, char **argv) {
char *file_content = get_file_content("test_files/test.html");
- int result = quickmedia_html_find_nodes_xpath(file_content, "//h3[class=\"story_name\"]//a", result_callback, NULL);
+ QuickMediaHtmlSearch html_search;
+
+ int result = quickmedia_html_search_init(&html_search, file_content);
+ if(result != 0)
+ goto cleanup;
+ result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class=\"story_name\"]//a", result_callback, NULL);
+ /* Test that the object can be reused without reloading html doc */
+ result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class=\"story_name\"]//a", result_callback, NULL);
+
+ cleanup:
+ quickmedia_html_search_deinit(&html_search);
free(file_content);
return result;
}