#ifndef HTML_PARSER_H #define HTML_PARSER_H /* A small html parser with no dependencies and doesn't dynamically allocate any memory and can parse and repair broken html (just like web browsers) */ #include typedef struct { const char *data; size_t size; } HtmlStringView; typedef struct HtmlParser HtmlParser; typedef enum{ HTML_PARSE_TAG_START, HTML_PARSE_TAG_END, HTML_PARSE_ATTRIBUTE, HTML_PARSE_TEXT, HTML_PARSE_JAVASCRIPT_CODE } HtmlParseType; typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata); #define UNCLOSED_TAGS_SIZE 2048 struct HtmlParser { const char *source; size_t source_len; size_t offset; HtmlParseCallback parse_callback; void *callback_userdata; HtmlStringView tag_name; HtmlStringView attribute_key; HtmlStringView attribute_value; HtmlStringView text; /* Only used when parse_type is HTML_PARSE_TEXT */ HtmlStringView text_stripped; int is_tag_void; int inside_script_tag; size_t unclosed_tags_offset; HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE]; }; /* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */ void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); void html_parser_deinit(HtmlParser *self); void html_parser_parse(HtmlParser *self); #endif /* HTML_PARSER_H */