blob: fc3ff3a848cd6e788de33985abe874b0cb6c645f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
#ifndef HTML_PARSER_H
#define HTML_PARSER_H
/*
A small html parser with no dependencies and doesn't dynamically allocate any memory
and can parse and repair broken html (just like web browsers)
*/
#include <stddef.h>
typedef struct {
const char *data;
size_t size;
} HtmlStringView;
typedef struct HtmlParser HtmlParser;
typedef enum{
HTML_PARSE_TAG_START,
HTML_PARSE_TAG_END,
HTML_PARSE_ATTRIBUTE,
HTML_PARSE_TEXT,
HTML_PARSE_JAVASCRIPT_CODE
} HtmlParseType;
typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata);
#define UNCLOSED_TAGS_SIZE 2048
struct HtmlParser {
const char *source;
size_t source_len;
size_t offset;
HtmlParseCallback parse_callback;
void *callback_userdata;
HtmlStringView tag_name;
HtmlStringView attribute_key;
HtmlStringView attribute_value;
HtmlStringView text;
/* Only used when parse_type is HTML_PARSE_TEXT */
HtmlStringView text_stripped;
int is_tag_void;
int inside_script_tag;
size_t unclosed_tags_offset;
HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE];
};
/* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */
void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
void html_parser_deinit(HtmlParser *self);
void html_parser_parse(HtmlParser *self);
#endif /* HTML_PARSER_H */
|