From fab7a00da328debef4fbeba9eb21fc02cf21675d Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 19 Aug 2023 13:26:46 +0200 Subject: Update html-parser version to latest, add html_node_find_child --- depends/html-parser | 2 +- include/HtmlTree.h | 7 +++++++ src/HtmlTree.c | 41 ++++++++++++++++++++++++++++++++++++++++- tests/main.c | 5 +++-- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/depends/html-parser b/depends/html-parser index 66ec83b..5ea469d 160000 --- a/depends/html-parser +++ b/depends/html-parser @@ -1 +1 @@ -Subproject commit 66ec83b862ea2a8dbda1c1f3663af88a8d12d9bc +Subproject commit 5ea469de363ab319f1cf0707c56bf31652877958 diff --git a/include/HtmlTree.h b/include/HtmlTree.h index 4378152..2f25e1d 100644 --- a/include/HtmlTree.h +++ b/include/HtmlTree.h @@ -54,6 +54,13 @@ void html_tree_deinit(HtmlTree *self); /* Case insensitive match. Returns NULL if not found */ HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name); +/* + Returns NULL if not found. + This function is not recursive. + Case insensitive search. +*/ +HtmlNode* html_node_find_child(HtmlNode *self, const char *tag_name, const char *attribute_name, const char *attribute_value); + #ifdef __cplusplus } #endif diff --git a/src/HtmlTree.c b/src/HtmlTree.c index faef08a..4c1628c 100644 --- a/src/HtmlTree.c +++ b/src/HtmlTree.c @@ -1,5 +1,6 @@ #include "../include/HtmlTree.h" #include +#include #include static void html_node_deinit(HtmlNode *self); @@ -123,7 +124,11 @@ static int parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, voi } case HTML_PARSE_TEXT: case HTML_PARSE_JAVASCRIPT_CODE: { - HtmlNode *new_node = html_node_create(html_parser->text_stripped, parse_userdata->current_node, parse_type == HTML_PARSE_TEXT ? HTML_NODE_TEXT : HTML_NODE_JS); + HtmlNode *new_node; + if(html_parser->text_stripped.size == 0) + return 0; + + new_node = html_node_create(html_parser->text_stripped, parse_userdata->current_node, parse_type == HTML_PARSE_TEXT ? HTML_NODE_TEXT : HTML_NODE_JS); if(!new_node) return 1; @@ -162,3 +167,37 @@ HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView na } return NULL; } + +HtmlNode* html_node_find_child(HtmlNode *self, const char *tag_name, const char *attribute_name, const char *attribute_value) { + HtmlStringView tag; + HtmlStringView attr_name; + HtmlStringView attr_value; + HtmlNodeChild *child; + + tag.data = tag_name; + tag.size = strlen(tag_name); + + attr_name.data = attribute_name; + attr_name.size = strlen(attribute_name); + + attr_value.data = attribute_value; + attr_value.size = strlen(attribute_value); + + for(child = self->first_child; child; child = child->next) { + HtmlAttribute *attr; + if(child->node.node_type != HTML_NODE_NODE) + continue; + + if(!html_string_view_equals_case_insensitive(&child->node.name_or_value, &tag)) + continue; + + attr = html_node_get_attribute_by_name(&child->node, attr_name); + if(!attr) + continue; + + if(html_string_view_equals_case_insensitive(&attr->value, &attr_value)) + return &child->node; + } + + return NULL; +} diff --git a/tests/main.c b/tests/main.c index 4c9e93c..7ba19f5 100644 --- a/tests/main.c +++ b/tests/main.c @@ -4,6 +4,7 @@ #include char* file_get_content(const char *path, long *filesize) { + char *data; FILE *file = fopen(path, "rb"); if(!file) { perror(path); @@ -14,7 +15,7 @@ char* file_get_content(const char *path, long *filesize) { *filesize = ftell(file); fseek(file, 0, SEEK_SET); - char *data = malloc(*filesize); + data = malloc(*filesize); fread(data, 1, *filesize, file); fclose(file); return data; @@ -56,7 +57,7 @@ void html_node_print(HtmlNode *node) { } } -int main() { +int main(void) { int result; HtmlTree html_tree; long filesize; -- cgit v1.2.3