diff options
author | dec05eba <dec05eba@protonmail.com> | 2020-06-06 02:50:51 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2020-06-06 02:50:51 +0200 |
commit | 9b45f8fe806b78300109274e4c951f894ffbac70 (patch) | |
tree | 14a14cafd00ee2420881a05958f5cfed0e562f42 /src | |
parent | f62af612c6e6217d7b2a1b064cdf10383524302b (diff) |
Convert uppercase to lowercase for tag and attribute name, fix missing is_identifier_char
Diffstat (limited to 'src')
-rw-r--r-- | src/HtmlParser.c | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/src/HtmlParser.c b/src/HtmlParser.c index 965368e..8b27d6d 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -46,7 +46,7 @@ static int is_newline(int c) { return c == '\n' || c == '\r'; } -static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { +static void lstrip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { size_t i = 0; while(i < size && strip_filter_func(str[i])) { ++i; @@ -55,7 +55,7 @@ static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size = size - i; } -static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { +static void rstrip(char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { ssize_t i = size - 1; while(i >= 0 && strip_filter_func(str[i])) { --i; @@ -63,11 +63,20 @@ static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip *output_size = i + 1; } -static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { +static void strip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { lstrip(str, size, output_str, output_size, strip_filter_func); rstrip(*output_str, *output_size, output_size, strip_filter_func); } +static void html_string_view_to_lowercase(HtmlStringView *string_view) { + size_t i = 0; + for(; i < string_view->size; ++i) { + char c = string_view->data[i]; + if(c >= 'A' && c <= 'Z') + string_view->data[i] += 32; + } +} + static int is_void_tag(HtmlStringView *tag_name) { HtmlStringView *tag_iter = &void_tags[0]; /* !DOCTYPE, !--, etc.... */ @@ -98,7 +107,7 @@ static void html_parser_reset(HtmlParser *self) { self->unclosed_tags_offset = 0; } -void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { +void html_parser_init(HtmlParser *self, char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { self->source = html_source; self->source_len = len; self->parse_callback = parse_callback; @@ -131,19 +140,7 @@ static void html_parser_advance_char(HtmlParser *self) { ++self->offset; } -static int is_alpha(char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -static int is_digit(char c) { - return c >= '0' && c <= '9'; -} - -static int is_identifier_char(char c) { - return is_alpha(c) || is_digit(c) || c == '-' || c == '_' || c == '!' || c == ':'; -} - -static void html_parser_try_append_unclosed_tag(HtmlParser *self, const char *data, size_t size) { +static void html_parser_try_append_unclosed_tag(HtmlParser *self, char *data, size_t size) { if(self->unclosed_tags_offset == UNCLOSED_TAGS_SIZE) { fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n"); return; @@ -196,6 +193,24 @@ static int is_attribute_value_char(char c) { } } +static int is_identifier_char(char c) { + switch(c) { + case ' ': + case '\t': + case '\n': + case '\v': + case '"': + case '\'': + case '<': + case '>': + case '/': + case '=': + return 0; + default: + return 1; + } +} + /* TODO: Unescape html characters in attribute value */ static void html_parser_parse_attribute_value_quoted(HtmlParser *self, char quote_symbol) { self->attribute_value.data = self->source + self->offset; @@ -330,6 +345,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { if(tag_name_found) { /* attribute name */ self->attribute_key = identifier; + html_string_view_to_lowercase(&self->attribute_key); self->attribute_value.data = NULL; self->attribute_value.size = 0; @@ -351,6 +367,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { } else { /* tag name */ self->tag_name = identifier; + html_string_view_to_lowercase(&self->tag_name); tag_name_found = 1; if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) { html_parser_goto_comment_end(self); |