#ifndef HTML_PARSER_H #define HTML_PARSER_H /* A small html parser with no dependencies and doesn't dynamically allocate any memory and can parse and repair broken html (just like web browsers) */ #include #ifdef __cplusplus extern "C" { #endif typedef struct { const char *data; size_t size; } HtmlStringView; typedef struct HtmlParser HtmlParser; typedef enum{ HTML_PARSE_TAG_START, HTML_PARSE_TAG_END, HTML_PARSE_ATTRIBUTE, HTML_PARSE_TEXT, HTML_PARSE_JAVASCRIPT_CODE } HtmlParseType; /* Return 0 to continue */ typedef int (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata); #define UNCLOSED_TAGS_SIZE 2048 struct HtmlParser { const char *source; size_t source_len; size_t offset; HtmlParseCallback parse_callback; void *callback_userdata; /* The name of the current enclosing tag */ HtmlStringView tag_name; HtmlStringView attribute_key; HtmlStringView attribute_value; HtmlStringView text; HtmlStringView text_stripped; HtmlStringView tag_before_void_tag; int is_tag_void; int inside_script_tag; size_t unclosed_tags_offset; HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE]; }; /* Returns the value returned from |parse_callback|. 0 meaning success. Input text is expected to be in utf8 and may or may not have UTF8-BOM. Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END. Note: HTML_PARSE_TEXT may be called multiple times for a tag. For example if a tag has multiple text items split between child tags like this:
hello

text

world
. In this case, HTML_PARSE_TEXT will be called twice for the div tag. First with "hello" and then with "world". This function does 0 dynamic memory allocations. */ int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); /* Returns 1 if equals */ int html_string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other); #ifdef __cplusplus } #endif #endif /* HTML_PARSER_H */