aboutsummaryrefslogtreecommitdiff
path: root/include/HtmlTree.h
blob: 2f25e1d5fc6477346a262ab4a70a5843db596251 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#ifndef HTML_TREE_H
#define HTML_TREE_H

#include <stddef.h>
#include <HtmlParser.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef struct HtmlNode HtmlNode;
typedef struct HtmlNodeChild HtmlNodeChild;
typedef struct HtmlAttribute HtmlAttribute;

typedef enum {
    HTML_NODE_NODE,
    HTML_NODE_TEXT,
    HTML_NODE_JS
} HtmlNodeType;

/* text and javascript code are also html nodes */
struct HtmlNode {
    HtmlStringView name_or_value; /* name of the node if |node_type| is HTML_NODE_NODE, otherwise the text/code value */
    HtmlNode *parent;
    HtmlNodeChild *first_child;
    HtmlNodeChild *last_child;
    HtmlAttribute *first_attr;
    HtmlNodeType node_type;
};

struct HtmlNodeChild {
    HtmlNode node;
    HtmlNodeChild *next;
};

struct HtmlAttribute {
    HtmlStringView key;
    HtmlStringView value;
    HtmlAttribute *next;
};

typedef struct {
    HtmlNode root_node; /* This is not the <html> node. The root_node is always a node with no name and no attributes and only has children */
} HtmlTree;

/*
    Returns 0 on success.
    Input text is expected to be in utf8 and may or may not have UTF8-BOM.
    |html_source| has to be valid until |html_tree_deinit| is called to free the html node |self|.
*/
int html_parse_to_tree(HtmlTree *self, const char *html_source, size_t len);
void html_tree_deinit(HtmlTree *self);

/* Case insensitive match. Returns NULL if not found */
HtmlAttribute* html_node_get_attribute_by_name(HtmlNode *self, HtmlStringView name);

/*
    Returns NULL if not found.
    This function is not recursive.
    Case insensitive search.
*/
HtmlNode* html_node_find_child(HtmlNode *self, const char *tag_name, const char *attribute_name, const char *attribute_value);

#ifdef __cplusplus
}
#endif

#endif /* HTML_TREE_H */