From 4a2f50f00529aa0894486a099b721826add9205b Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 28 Apr 2021 05:08:42 +0200 Subject: Make tags case insensitive --- src/HtmlParser.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/HtmlParser.c b/src/HtmlParser.c index 9df3983..f85a633 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -20,13 +20,27 @@ static HtmlStringView void_tags[] = { {"source", 6}, {"track", 5}, {"wbr", 3}, + {"xml", 3}, {NULL, 0} }; static HtmlStringView script_tag = {"script", 6}; -static int string_view_equals(HtmlStringView *self, HtmlStringView *other) { - return self->size == other->size && memcmp(self->data, other->data, self->size) == 0; +static char to_lower(char c) { + if(c >= 'A' && c <= 'Z') + return c + 32; + else + return c; +} + +static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) { + size_t i = 0; + if(self->size != other->size) return 0; + for(; i < self->size; ++i) { + if(to_lower(self->data[i]) != to_lower(other->data[i])) + return 0; + } + return 1; } static int is_whitespace(int c) { @@ -71,9 +85,7 @@ static void strip(char *str, size_t size, char **output_str, size_t *output_size static void html_string_view_to_lowercase(HtmlStringView *string_view) { size_t i = 0; for(; i < string_view->size; ++i) { - char c = string_view->data[i]; - if(c >= 'A' && c <= 'Z') - string_view->data[i] += 32; + string_view->data[i] = to_lower(string_view->data[i]); } } @@ -83,7 +95,7 @@ static int is_void_tag(HtmlStringView *tag_name) { if(tag_name->size > 0 && tag_name->data[0] == '!') return 1; while(tag_iter->data) { - if(string_view_equals(tag_name, tag_iter)) + if(string_view_equals_case_insensitive(tag_name, tag_iter)) return 1; ++tag_iter; } @@ -352,7 +364,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { if(tag_name_found) { /* attribute name */ self->attribute_key = identifier; - html_string_view_to_lowercase(&self->attribute_key); + /*html_string_view_to_lowercase(&self->attribute_key);*/ self->attribute_value.data = NULL; self->attribute_value.size = 0; @@ -375,7 +387,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { /* tag name */ HtmlStringView prev_tag_name = self->tag_name; self->tag_name = identifier; - html_string_view_to_lowercase(&self->tag_name); + /*html_string_view_to_lowercase(&self->tag_name);*/ tag_name_found = 1; if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) { html_parser_goto_comment_end(self); @@ -386,7 +398,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { self->tag_before_void_tag = prev_tag_name; } else { html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size); - self->inside_script_tag = string_view_equals(&self->tag_name, &script_tag); + self->inside_script_tag = string_view_equals_case_insensitive(&self->tag_name, &script_tag); } self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata); } @@ -429,7 +441,7 @@ static void html_parser_parse_tag_end(HtmlParser *self) { found_start_tag_index = -1; for(i = self->unclosed_tags_offset - 1; i >= 0; --i) { - if(string_view_equals(&self->unclosed_tags[i], &tag_end_name)) { + if(string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) { found_start_tag_index = i; break; } -- cgit v1.2.3