aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-07-19 18:25:31 +0200
committerdec05eba <dec05eba@protonmail.com>2021-07-19 18:25:31 +0200
commit8d40850ffd52100aa51f9dc7814cc7b334381c32 (patch)
tree67c81a126ea02aed90ed7b5a33cf10df0fe4840a
parent11d3632fe4508bfd2f668b7b1c4d75a88cd6449d (diff)
Fix script tag not getting closed until the end
-rw-r--r--include/HtmlParser.h2
-rw-r--r--src/HtmlParser.c31
2 files changed, 25 insertions, 8 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index 5c1f4c1..1e23d0d 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -56,10 +56,12 @@ struct HtmlParser {
/*
Returns the value returned from |parse_callback|. 0 meaning success.
+ Input text is expected to be in utf8 and may or may not have UTF8-BOM.
Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END.
Note: HTML_PARSE_TEXT may be called multiple times for a tag. For example if a tag has multiple text items split between child tags
like this: <div>hello<h1>text</h1>world</div>.
In this case, HTML_PARSE_TEXT will be called twice for the div tag. First with "hello" and then with "world".
+ This function does 0 dynamic memory allocations.
*/
int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index a760f98..8a595ac 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -26,9 +26,9 @@ static HtmlStringView void_tags[] = {
static HtmlStringView script_tag = {"script", 6};
-static char to_lower(char c) {
- if(c >= 'A' && c <= 'Z')
- return c + 32;
+static char to_upper(char c) {
+ if(c >= 'a' && c <= 'z')
+ return c - 32;
else
return c;
}
@@ -37,7 +37,7 @@ static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringV
size_t i = 0;
if(self->size != other->size) return 0;
for(; i < self->size; ++i) {
- if(to_lower(self->data[i]) != to_lower(other->data[i]))
+ if(to_upper(self->data[i]) != to_upper(other->data[i]))
return 0;
}
return 1;
@@ -103,6 +103,12 @@ static int is_void_tag(HtmlStringView *tag_name) {
}
static void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
+ /* Utf8 BOM */
+ if(len >= 3 && memcmp(html_source, "\xef\xbb\xbf", 3) == 0) {
+ html_source += 3;
+ len -= 3;
+ }
+
self->source = html_source;
self->source_len = len;
self->parse_callback = parse_callback;
@@ -268,6 +274,7 @@ static void html_parser_goto_end_of_js_string(HtmlParser *self, char quote_symbo
}
static int html_parser_goto_script_end_tag(HtmlParser *self) {
+ int res = 0;
self->text.data = self->source + self->offset;
self->text.size = 0;
for(;;) {
@@ -300,10 +307,18 @@ static int html_parser_goto_script_end_tag(HtmlParser *self) {
}
}
- if(self->text_stripped.size > 0)
- return self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
- else
- return 0;
+ if(self->text_stripped.size > 0) {
+ res = self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
+ if(res != 0)
+ return res;
+ }
+
+ res = self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ if(res != 0)
+ return res;
+
+ html_parser_try_pop_unclosed_tag(self);
+ return res;
}
static void html_parser_goto_comment_end(HtmlParser *self) {