diff options
-rw-r--r-- | include/HtmlParser.h | 3 | ||||
-rw-r--r-- | src/HtmlParser.c | 37 |
2 files changed, 26 insertions, 14 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h index 1e23d0d..87a3aa3 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -65,6 +65,9 @@ struct HtmlParser { */ int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); +/* Returns 1 if equals */ +int html_string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other); + #ifdef __cplusplus } #endif diff --git a/src/HtmlParser.c b/src/HtmlParser.c index a5cde1c..fafcf5a 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -1,6 +1,4 @@ #include "../include/HtmlParser.h" -#include <stdio.h> -#include <string.h> #include <assert.h> static HtmlStringView void_tags[] = { @@ -33,7 +31,7 @@ static char to_upper(char c) { return c; } -static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) { +int html_string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) { size_t i = 0; if(self->size != other->size) return 0; for(; i < self->size; ++i) { @@ -43,6 +41,17 @@ static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringV return 1; } +static int memeql(const void *m1, const void *m2, size_t size) { + size_t i = 0; + for(; i < size; ++i) { + const char c1 = ((const char*)m1)[i]; + const char c2 = ((const char*)m2)[i]; + if(c1 != c2) + return 0; + } + return 1; +} + static int is_whitespace(int c) { switch(c) { case ' ': @@ -70,7 +79,7 @@ static void lstrip(const char *str, size_t size, const char **output_str, size_t } static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { - ssize_t i = size - 1; + long i = size - 1; while(i >= 0 && strip_filter_func(str[i])) { --i; } @@ -95,7 +104,7 @@ static int is_void_tag(HtmlStringView *tag_name) { if(tag_name->size > 0 && tag_name->data[0] == '!') return 1; while(tag_iter->data) { - if(string_view_equals_case_insensitive(tag_name, tag_iter)) + if(html_string_view_equals_case_insensitive(tag_name, tag_iter)) return 1; ++tag_iter; } @@ -104,7 +113,7 @@ static int is_void_tag(HtmlStringView *tag_name) { static void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { /* Utf8 BOM */ - if(len >= 3 && memcmp(html_source, "\xef\xbb\xbf", 3) == 0) { + if(len >= 3 && memeql(html_source, "\xef\xbb\xbf", 3)) { html_source += 3; len -= 3; } @@ -156,7 +165,7 @@ static void html_parser_advance_char(HtmlParser *self) { static int html_parser_try_append_unclosed_tag(HtmlParser *self, const char *data, size_t size) { if(self->unclosed_tags_offset == UNCLOSED_TAGS_SIZE) { - fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n"); + /*fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n");*/ return 1; } self->unclosed_tags[self->unclosed_tags_offset].data = data; @@ -263,7 +272,7 @@ static int html_parser_goto_script_end_tag(HtmlParser *self) { self->text.size = 0; for(;;) { char c = html_parser_peek_char(self); - if(c == '<' && self->offset + 7 < self->source_len && memcmp(self->source + self->offset + 1, "/script", 7) == 0) { + if(c == '<' && self->offset + 7 < self->source_len && memeql(self->source + self->offset + 1, "/script", 7)) { self->text.size = (self->source + self->offset) - self->text.data; strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace); self->offset += 7; @@ -304,7 +313,7 @@ static int html_parser_goto_script_end_tag(HtmlParser *self) { static void html_parser_goto_comment_end(HtmlParser *self) { for(;;) { - if(self->source_len - self->offset >= 3 && memcmp(self->source + self->offset, "-->", 3) == 0) { + if(self->source_len - self->offset >= 3 && memeql(self->source + self->offset, "-->", 3)) { self->offset += 3; break; } @@ -390,7 +399,7 @@ static int html_parser_parse_tag_start(HtmlParser *self) { self->tag_name = identifier; /*html_string_view_to_lowercase(&self->tag_name);*/ tag_name_found = 1; - if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) { + if(self->tag_name.size == 3 && memeql(self->tag_name.data, "!--", 3)) { html_parser_goto_comment_end(self); return 0; } @@ -401,7 +410,7 @@ static int html_parser_parse_tag_start(HtmlParser *self) { res = html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size); if(res != 0) return res; - self->inside_script_tag = string_view_equals_case_insensitive(&self->tag_name, &script_tag); + self->inside_script_tag = html_string_view_equals_case_insensitive(&self->tag_name, &script_tag); } res = self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata); if(res != 0) @@ -422,8 +431,8 @@ static int html_parser_parse_tag_end(HtmlParser *self) { return 0; } else if(!tag_name_found && is_identifier_char(c)) { HtmlStringView tag_end_name; - ssize_t found_start_tag_index; - ssize_t i; + long found_start_tag_index; + long i; tag_end_name.data = self->source + self->offset; html_parser_advance_char(self); @@ -446,7 +455,7 @@ static int html_parser_parse_tag_end(HtmlParser *self) { found_start_tag_index = -1; for(i = self->unclosed_tags_offset - 1; i >= 0; --i) { - if(string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) { + if(html_string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) { found_start_tag_index = i; break; } |