aboutsummaryrefslogtreecommitdiff
path: root/include/HtmlTree.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/HtmlTree.h')
-rw-r--r--include/HtmlTree.h61
1 files changed, 61 insertions, 0 deletions
diff --git a/include/HtmlTree.h b/include/HtmlTree.h
new file mode 100644
index 0000000..6bb3c5f
--- /dev/null
+++ b/include/HtmlTree.h
@@ -0,0 +1,61 @@
+#ifndef HTML_TREE_H
+#define HTML_TREE_H
+
+#include <stddef.h>
+#include <HtmlParser.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct HtmlNode HtmlNode;
+typedef struct HtmlNodeChild HtmlNodeChild;
+typedef struct HtmlAttribute HtmlAttribute;
+
+typedef enum {
+ HTML_NODE_NODE,
+ HTML_NODE_TEXT,
+ HTML_NODE_JS
+} HtmlNodeType;
+
+/* text and javascript code are also html nodes */
+struct HtmlNode {
+ HtmlStringView name_or_value; /* name of the node if |node_type| is HTML_NODE_NODE, otherwise the text/code value */
+ HtmlNode *parent;
+ HtmlNodeChild *first_child;
+ HtmlNodeChild *last_child;
+ HtmlAttribute *first_attr;
+ HtmlNodeType node_type;
+};
+
+struct HtmlNodeChild {
+ HtmlNode *node;
+ HtmlNodeChild *next;
+};
+
+struct HtmlAttribute {
+ HtmlStringView key;
+ HtmlStringView value;
+ HtmlAttribute *next;
+};
+
+typedef struct {
+ HtmlNode root_node; /* This is not the <html> node. The root_node is always a node with no name and no attributes and only has children */
+} HtmlTree;
+
+/*
+ Returns 0 on success.
+ Input text is expected to be in utf8 and may or may not have UTF8-BOM.
+ |html_source| has to be valid until |html_tree_deinit| is called to free the html node |self|.
+*/
+int html_parse_to_tree(HtmlTree *self, const char *html_source, size_t len);
+void html_tree_deinit(HtmlTree *self);
+
+/* Case insensitive match. Returns NULL if not found */
+HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HTML_TREE_H */