aboutsummaryrefslogtreecommitdiff
path: root/include/HtmlParser.h
blob: e6f0c3b5782e76bada1b687e688a12823a7d5522 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#ifndef HTML_PARSER_H
#define HTML_PARSER_H

/*
    A small html parser with no dependencies and doesn't dynamically allocate any memory
    and can parse and repair broken html (just like web browsers)
*/

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef struct {
    const char *data;
    size_t size;
} HtmlStringView;

typedef struct HtmlParser HtmlParser;

typedef enum{
    HTML_PARSE_TAG_START,
    HTML_PARSE_TAG_END,
    HTML_PARSE_ATTRIBUTE,
    HTML_PARSE_TEXT,
    HTML_PARSE_JAVASCRIPT_CODE
} HtmlParseType;

typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata);

#define UNCLOSED_TAGS_SIZE 2048

struct HtmlParser {
    const char *source;
    size_t source_len;
    size_t offset;
    HtmlParseCallback parse_callback;
    void *callback_userdata;

    /* The name of the current enclosing tag */
    HtmlStringView tag_name;
    HtmlStringView attribute_key;
    HtmlStringView attribute_value;
    HtmlStringView text;
    HtmlStringView text_stripped;

    HtmlStringView tag_before_void_tag;
    int is_tag_void;
    int inside_script_tag;

    size_t unclosed_tags_offset;
    HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE];
};

/* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */
void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);

#ifdef __cplusplus
}
#endif

#endif /* HTML_PARSER_H */