#include "../include/HtmlTree.h" #include #include #include static void html_node_deinit(HtmlNode *self); static void html_node_init(HtmlNode *self) { self->name_or_value.data = NULL; self->name_or_value.size = 0; self->parent = NULL; self->first_child = NULL; self->last_child = NULL; self->first_attr = NULL; self->node_type = HTML_NODE_NODE; } /* |parent| can't be NULL */ static HtmlNode* html_node_create(HtmlStringView name_or_value, HtmlNode *parent, HtmlNodeType node_type) { HtmlNodeChild *node_child = malloc(sizeof(HtmlNodeChild)); assert(parent); if(!node_child) return NULL; node_child->node.name_or_value = name_or_value; node_child->node.parent = parent; node_child->node.first_child = NULL; node_child->node.last_child = NULL; node_child->node.first_attr = NULL; node_child->node.node_type = node_type; node_child->next = NULL; if(!parent->first_child) { parent->first_child = node_child; parent->last_child = node_child; } else { parent->last_child->next = node_child; parent->last_child = node_child; } return &node_child->node; } static void html_node_child_deinit(HtmlNodeChild *self) { html_node_deinit(&self->node); if(self->next) { html_node_child_deinit(self->next); free(self->next); self->next = NULL; } } static void html_attribute_deinit(HtmlAttribute *self) { if(self->next) { html_attribute_deinit(self->next); free(self->next); self->next = NULL; } } void html_node_deinit(HtmlNode *self) { self->name_or_value.data = NULL; self->name_or_value.size = 0; self->parent = NULL; if(self->first_child) { html_node_child_deinit(self->first_child); free(self->first_child); self->first_child = NULL; self->last_child = NULL; } if(self->first_attr) { html_attribute_deinit(self->first_attr); free(self->first_attr); self->first_attr = NULL; } } typedef struct { HtmlNode *root_node; HtmlNode *current_node; HtmlAttribute *current_node_last_attribute; } ParseUserdata; static int parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { ParseUserdata *parse_userdata = userdata; switch(parse_type) { case HTML_PARSE_TAG_START: { HtmlNode *new_node = html_node_create(html_parser->tag_name, parse_userdata->current_node, HTML_NODE_NODE); if(!new_node) return 1; parse_userdata->current_node = new_node; parse_userdata->current_node_last_attribute = NULL; break; } case HTML_PARSE_TAG_END: { HtmlNode *parent_node = parse_userdata->current_node->parent; if(parent_node && parent_node != parse_userdata->root_node) { parse_userdata->current_node = parent_node; parse_userdata->current_node_last_attribute = NULL; } break; } case HTML_PARSE_ATTRIBUTE: { HtmlAttribute *new_attr = malloc(sizeof(HtmlAttribute)); if(!new_attr) return 1; new_attr->key = html_parser->attribute_key; new_attr->value = html_parser->attribute_value; new_attr->next = NULL; if(parse_userdata->current_node_last_attribute) parse_userdata->current_node_last_attribute->next = new_attr; else parse_userdata->current_node->first_attr = new_attr; parse_userdata->current_node_last_attribute = new_attr; break; } case HTML_PARSE_TEXT: case HTML_PARSE_JAVASCRIPT_CODE: { HtmlNode *new_node; if(html_parser->text_stripped.size == 0) return 0; new_node = html_node_create(html_parser->text_stripped, parse_userdata->current_node, parse_type == HTML_PARSE_TEXT ? HTML_NODE_TEXT : HTML_NODE_JS); if(!new_node) return 1; parse_userdata->current_node_last_attribute = NULL; break; } } return 0; } int html_parse_to_tree(HtmlTree *self, const char *html_source, size_t len) { int result; ParseUserdata parse_userdata; html_node_init(&self->root_node); parse_userdata.root_node = &self->root_node; parse_userdata.current_node = &self->root_node; parse_userdata.current_node_last_attribute = NULL; result = html_parser_parse(html_source, len, parse_callback, &parse_userdata); if(result != 0) html_tree_deinit(self); return result; } void html_tree_deinit(HtmlTree *self) { html_node_deinit(&self->root_node); } HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name) { HtmlAttribute *attr = self->first_attr; for(; attr; attr = attr->next) { if(html_string_view_equals_case_insensitive(&attr->key, &name)) return attr; } return NULL; } HtmlNode* html_node_find_child(HtmlNode *self, const char *tag_name, const char *attribute_name, const char *attribute_value) { HtmlStringView tag; HtmlStringView attr_name; HtmlStringView attr_value; HtmlNodeChild *child; tag.data = tag_name; tag.size = strlen(tag_name); attr_name.data = attribute_name; attr_name.size = strlen(attribute_name); attr_value.data = attribute_value; attr_value.size = strlen(attribute_value); for(child = self->first_child; child; child = child->next) { HtmlAttribute *attr; if(child->node.node_type != HTML_NODE_NODE) continue; if(!html_string_view_equals_case_insensitive(&child->node.name_or_value, &tag)) continue; attr = html_node_get_attribute_by_name(&child->node, attr_name); if(!attr) continue; if(html_string_view_equals_case_insensitive(&attr->value, &attr_value)) return &child->node; } return NULL; }