aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDEC05EBA <dec05eba@protonmail.com>2019-12-31 08:46:05 +0100
committerDEC05EBA <dec05eba@protonmail.com>2019-12-31 08:49:12 +0100
commitac0a3e0ebb9b460a31a76115cb4d494361c03e49 (patch)
tree50cbb9a689934792372eb998d3f2653776194829 /include
Initial commit, copied from wwwhtml-parser. Works. Need to add unescape of html sequences
Diffstat (limited to 'include')
-rw-r--r--include/HtmlParser.h54
1 files changed, 54 insertions, 0 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
new file mode 100644
index 0000000..72de123
--- /dev/null
+++ b/include/HtmlParser.h
@@ -0,0 +1,54 @@
+#ifndef HTML_PARSER_H
+#define HTML_PARSER_H
+
+/*
+ A small html parser with no dependencies and doesn't dynamically allocate any memory
+ and can parse and repair broken html (just like web browsers)
+*/
+
+#include <stddef.h>
+
+typedef struct {
+ const char *data;
+ size_t size;
+} StringView;
+
+typedef struct HtmlParser HtmlParser;
+
+typedef enum{
+ HTML_PARSE_TAG_START,
+ HTML_PARSE_TAG_END,
+ HTML_PARSE_ATTRIBUTE,
+ HTML_PARSE_TEXT,
+ HTML_PARSE_JAVASCRIPT_CODE
+} HtmlParseType;
+
+typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata);
+
+#define UNCLOSED_TAGS_SIZE 2048
+
+struct HtmlParser {
+ const char *source;
+ size_t source_len;
+ size_t offset;
+ HtmlParseCallback parse_callback;
+ void *callback_userdata;
+
+ StringView tag_name;
+ StringView attribute_key;
+ StringView attribute_value;
+ StringView text;
+
+ int is_tag_void;
+ int inside_script_tag;
+
+ size_t unclosed_tags_offset;
+ StringView unclosed_tags[UNCLOSED_TAGS_SIZE];
+};
+
+void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
+void html_parser_deinit(HtmlParser *self);
+
+void html_parser_parse(HtmlParser *self);
+
+#endif /* HTML_PARSER_H */ \ No newline at end of file