aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-04-28 05:08:42 +0200
committerdec05eba <dec05eba@protonmail.com>2021-04-28 05:08:42 +0200
commit4a2f50f00529aa0894486a099b721826add9205b (patch)
tree873aacfaf9515d80a215b47a3e103b711ed1568b
parent67f5526455ecd1f02c0adf797decd954f07387bf (diff)
Make tags case insensitive
-rw-r--r--src/HtmlParser.c32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index 9df3983..f85a633 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -20,13 +20,27 @@ static HtmlStringView void_tags[] = {
{"source", 6},
{"track", 5},
{"wbr", 3},
+ {"xml", 3},
{NULL, 0}
};
static HtmlStringView script_tag = {"script", 6};
-static int string_view_equals(HtmlStringView *self, HtmlStringView *other) {
- return self->size == other->size && memcmp(self->data, other->data, self->size) == 0;
+static char to_lower(char c) {
+ if(c >= 'A' && c <= 'Z')
+ return c + 32;
+ else
+ return c;
+}
+
+static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) {
+ size_t i = 0;
+ if(self->size != other->size) return 0;
+ for(; i < self->size; ++i) {
+ if(to_lower(self->data[i]) != to_lower(other->data[i]))
+ return 0;
+ }
+ return 1;
}
static int is_whitespace(int c) {
@@ -71,9 +85,7 @@ static void strip(char *str, size_t size, char **output_str, size_t *output_size
static void html_string_view_to_lowercase(HtmlStringView *string_view) {
size_t i = 0;
for(; i < string_view->size; ++i) {
- char c = string_view->data[i];
- if(c >= 'A' && c <= 'Z')
- string_view->data[i] += 32;
+ string_view->data[i] = to_lower(string_view->data[i]);
}
}
@@ -83,7 +95,7 @@ static int is_void_tag(HtmlStringView *tag_name) {
if(tag_name->size > 0 && tag_name->data[0] == '!')
return 1;
while(tag_iter->data) {
- if(string_view_equals(tag_name, tag_iter))
+ if(string_view_equals_case_insensitive(tag_name, tag_iter))
return 1;
++tag_iter;
}
@@ -352,7 +364,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
if(tag_name_found) {
/* attribute name */
self->attribute_key = identifier;
- html_string_view_to_lowercase(&self->attribute_key);
+ /*html_string_view_to_lowercase(&self->attribute_key);*/
self->attribute_value.data = NULL;
self->attribute_value.size = 0;
@@ -375,7 +387,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
/* tag name */
HtmlStringView prev_tag_name = self->tag_name;
self->tag_name = identifier;
- html_string_view_to_lowercase(&self->tag_name);
+ /*html_string_view_to_lowercase(&self->tag_name);*/
tag_name_found = 1;
if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) {
html_parser_goto_comment_end(self);
@@ -386,7 +398,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
self->tag_before_void_tag = prev_tag_name;
} else {
html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size);
- self->inside_script_tag = string_view_equals(&self->tag_name, &script_tag);
+ self->inside_script_tag = string_view_equals_case_insensitive(&self->tag_name, &script_tag);
}
self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata);
}
@@ -429,7 +441,7 @@ static void html_parser_parse_tag_end(HtmlParser *self) {
found_start_tag_index = -1;
for(i = self->unclosed_tags_offset - 1; i >= 0; --i) {
- if(string_view_equals(&self->unclosed_tags[i], &tag_end_name)) {
+ if(string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) {
found_start_tag_index = i;
break;
}