diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/HtmlTree.h | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/include/HtmlTree.h b/include/HtmlTree.h new file mode 100644 index 0000000..6bb3c5f --- /dev/null +++ b/include/HtmlTree.h @@ -0,0 +1,61 @@ +#ifndef HTML_TREE_H +#define HTML_TREE_H + +#include <stddef.h> +#include <HtmlParser.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct HtmlNode HtmlNode; +typedef struct HtmlNodeChild HtmlNodeChild; +typedef struct HtmlAttribute HtmlAttribute; + +typedef enum { + HTML_NODE_NODE, + HTML_NODE_TEXT, + HTML_NODE_JS +} HtmlNodeType; + +/* text and javascript code are also html nodes */ +struct HtmlNode { + HtmlStringView name_or_value; /* name of the node if |node_type| is HTML_NODE_NODE, otherwise the text/code value */ + HtmlNode *parent; + HtmlNodeChild *first_child; + HtmlNodeChild *last_child; + HtmlAttribute *first_attr; + HtmlNodeType node_type; +}; + +struct HtmlNodeChild { + HtmlNode *node; + HtmlNodeChild *next; +}; + +struct HtmlAttribute { + HtmlStringView key; + HtmlStringView value; + HtmlAttribute *next; +}; + +typedef struct { + HtmlNode root_node; /* This is not the <html> node. The root_node is always a node with no name and no attributes and only has children */ +} HtmlTree; + +/* + Returns 0 on success. + Input text is expected to be in utf8 and may or may not have UTF8-BOM. + |html_source| has to be valid until |html_tree_deinit| is called to free the html node |self|. +*/ +int html_parse_to_tree(HtmlTree *self, const char *html_source, size_t len); +void html_tree_deinit(HtmlTree *self); + +/* Case insensitive match. Returns NULL if not found */ +HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name); + +#ifdef __cplusplus +} +#endif + +#endif /* HTML_TREE_H */ |