From 534c441fd8172322ff5eaad54a1d26b9d8492c39 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 15 Sep 2021 18:22:45 +0200 Subject: Initial commit, finished --- src/HtmlTree.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 src/HtmlTree.c (limited to 'src') diff --git a/src/HtmlTree.c b/src/HtmlTree.c new file mode 100644 index 0000000..f4deb88 --- /dev/null +++ b/src/HtmlTree.c @@ -0,0 +1,171 @@ +#include "../include/HtmlTree.h" +#include + +static void html_node_deinit(HtmlNode *self); + +static void html_node_init(HtmlNode *self) { + self->name_or_value.data = NULL; + self->name_or_value.size = 0; + self->parent = NULL; + self->first_child = NULL; + self->last_child = NULL; + self->first_attr = NULL; + self->node_type = HTML_NODE_NODE; +} + +static HtmlNode* html_node_create(HtmlStringView name_or_value, HtmlNode *parent, HtmlNodeType node_type) { + HtmlNode *new_node = malloc(sizeof(HtmlNode)); + if(!new_node) + return NULL; + + new_node->name_or_value = name_or_value; + new_node->parent = parent; + new_node->first_child = NULL; + new_node->last_child = NULL; + new_node->first_attr = NULL; + new_node->node_type = node_type; + + if(parent) { + HtmlNodeChild *node_child = malloc(sizeof(HtmlNodeChild)); + if(!node_child) { + free(new_node); + return NULL; + } + + node_child->node = new_node; + node_child->next = NULL; + + if(!parent->first_child) { + parent->first_child = node_child; + parent->last_child = node_child; + } else { + parent->last_child->next = node_child; + parent->last_child = node_child; + } + } + + return new_node; +} + +static void html_node_child_deinit(HtmlNodeChild *self) { + html_node_deinit(self->node); + free(self->node); + self->node = NULL; + + if(self->next) { + html_node_child_deinit(self->next); + free(self->next); + self->next = NULL; + } +} + +static void html_attribute_deinit(HtmlAttribute *self) { + if(self->next) { + html_attribute_deinit(self->next); + free(self->next); + self->next = NULL; + } +} + +void html_node_deinit(HtmlNode *self) { + self->name_or_value.data = NULL; + self->name_or_value.size = 0; + self->parent = NULL; + + if(self->first_child) { + html_node_child_deinit(self->first_child); + free(self->first_child); + self->first_child = NULL; + self->last_child = NULL; + } + + if(self->first_attr) { + html_attribute_deinit(self->first_attr); + free(self->first_attr); + self->first_attr = NULL; + } +} + +typedef struct { + HtmlNode *current_node; + HtmlAttribute *current_node_last_attribute; +} ParseUserdata; + +static int parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { + ParseUserdata *parse_userdata = userdata; + + switch(parse_type) { + case HTML_PARSE_TAG_START: { + HtmlNode *new_node = html_node_create(html_parser->tag_name, parse_userdata->current_node, HTML_NODE_NODE); + if(!new_node) + return 1; + + parse_userdata->current_node = new_node; + parse_userdata->current_node_last_attribute = NULL; + break; + } + case HTML_PARSE_TAG_END: { + HtmlNode *parent_node = parse_userdata->current_node->parent; + if(parent_node) { + parse_userdata->current_node = parent_node; + parse_userdata->current_node_last_attribute = NULL; + } + break; + } + case HTML_PARSE_ATTRIBUTE: { + HtmlAttribute *new_attr = malloc(sizeof(HtmlAttribute)); + if(!new_attr) + return 1; + + new_attr->key = html_parser->attribute_key; + new_attr->value = html_parser->attribute_value; + new_attr->next = NULL; + + if(parse_userdata->current_node_last_attribute) + parse_userdata->current_node_last_attribute->next = new_attr; + else + parse_userdata->current_node->first_attr = new_attr; + + parse_userdata->current_node_last_attribute = new_attr; + break; + } + case HTML_PARSE_TEXT: + case HTML_PARSE_JAVASCRIPT_CODE: { + HtmlNode *new_node = html_node_create(html_parser->text_stripped, parse_userdata->current_node, parse_type == HTML_PARSE_TEXT ? HTML_NODE_TEXT : HTML_NODE_JS); + if(!new_node) + return 1; + + parse_userdata->current_node_last_attribute = NULL; + break; + } + } + + return 0; +} + +int html_parse_to_tree(HtmlTree *self, const char *html_source, size_t len) { + int result; + ParseUserdata parse_userdata; + html_node_init(&self->root_node); + + parse_userdata.current_node = &self->root_node; + parse_userdata.current_node_last_attribute = NULL; + result = html_parser_parse(html_source, len, parse_callback, &parse_userdata); + + if(result != 0) + html_tree_deinit(self); + return result; +} + +void html_tree_deinit(HtmlTree *self) { + html_node_deinit(&self->root_node); +} + +HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name) { + HtmlAttribute *attr = self->first_attr; + for(; attr; attr = attr->next) { + if(html_string_view_equals_case_insensitive(&attr->key, &name)) + return attr; + } + return NULL; +} -- cgit v1.2.3