aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-09-15 17:18:51 +0200
committerdec05eba <dec05eba@protonmail.com>2021-09-15 17:18:51 +0200
commitfe3993c221a604f5fb9f7ef1ba6179740cbf9173 (patch)
tree4c8e4d0cc12feb52609b0fa294425896675a5223
parent2666c5618f0ba2cb885e484523d2261d938d119c (diff)
Remove dependency on string.h, make string case insensitive equals public
-rw-r--r--include/HtmlParser.h3
-rw-r--r--src/HtmlParser.c37
2 files changed, 26 insertions, 14 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index 1e23d0d..87a3aa3 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -65,6 +65,9 @@ struct HtmlParser {
*/
int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
+/* Returns 1 if equals */
+int html_string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index a5cde1c..fafcf5a 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -1,6 +1,4 @@
#include "../include/HtmlParser.h"
-#include <stdio.h>
-#include <string.h>
#include <assert.h>
static HtmlStringView void_tags[] = {
@@ -33,7 +31,7 @@ static char to_upper(char c) {
return c;
}
-static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) {
+int html_string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringView *other) {
size_t i = 0;
if(self->size != other->size) return 0;
for(; i < self->size; ++i) {
@@ -43,6 +41,17 @@ static int string_view_equals_case_insensitive(HtmlStringView *self, HtmlStringV
return 1;
}
+static int memeql(const void *m1, const void *m2, size_t size) {
+ size_t i = 0;
+ for(; i < size; ++i) {
+ const char c1 = ((const char*)m1)[i];
+ const char c2 = ((const char*)m2)[i];
+ if(c1 != c2)
+ return 0;
+ }
+ return 1;
+}
+
static int is_whitespace(int c) {
switch(c) {
case ' ':
@@ -70,7 +79,7 @@ static void lstrip(const char *str, size_t size, const char **output_str, size_t
}
static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) {
- ssize_t i = size - 1;
+ long i = size - 1;
while(i >= 0 && strip_filter_func(str[i])) {
--i;
}
@@ -95,7 +104,7 @@ static int is_void_tag(HtmlStringView *tag_name) {
if(tag_name->size > 0 && tag_name->data[0] == '!')
return 1;
while(tag_iter->data) {
- if(string_view_equals_case_insensitive(tag_name, tag_iter))
+ if(html_string_view_equals_case_insensitive(tag_name, tag_iter))
return 1;
++tag_iter;
}
@@ -104,7 +113,7 @@ static int is_void_tag(HtmlStringView *tag_name) {
static void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
/* Utf8 BOM */
- if(len >= 3 && memcmp(html_source, "\xef\xbb\xbf", 3) == 0) {
+ if(len >= 3 && memeql(html_source, "\xef\xbb\xbf", 3)) {
html_source += 3;
len -= 3;
}
@@ -156,7 +165,7 @@ static void html_parser_advance_char(HtmlParser *self) {
static int html_parser_try_append_unclosed_tag(HtmlParser *self, const char *data, size_t size) {
if(self->unclosed_tags_offset == UNCLOSED_TAGS_SIZE) {
- fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n");
+ /*fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n");*/
return 1;
}
self->unclosed_tags[self->unclosed_tags_offset].data = data;
@@ -263,7 +272,7 @@ static int html_parser_goto_script_end_tag(HtmlParser *self) {
self->text.size = 0;
for(;;) {
char c = html_parser_peek_char(self);
- if(c == '<' && self->offset + 7 < self->source_len && memcmp(self->source + self->offset + 1, "/script", 7) == 0) {
+ if(c == '<' && self->offset + 7 < self->source_len && memeql(self->source + self->offset + 1, "/script", 7)) {
self->text.size = (self->source + self->offset) - self->text.data;
strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace);
self->offset += 7;
@@ -304,7 +313,7 @@ static int html_parser_goto_script_end_tag(HtmlParser *self) {
static void html_parser_goto_comment_end(HtmlParser *self) {
for(;;) {
- if(self->source_len - self->offset >= 3 && memcmp(self->source + self->offset, "-->", 3) == 0) {
+ if(self->source_len - self->offset >= 3 && memeql(self->source + self->offset, "-->", 3)) {
self->offset += 3;
break;
}
@@ -390,7 +399,7 @@ static int html_parser_parse_tag_start(HtmlParser *self) {
self->tag_name = identifier;
/*html_string_view_to_lowercase(&self->tag_name);*/
tag_name_found = 1;
- if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) {
+ if(self->tag_name.size == 3 && memeql(self->tag_name.data, "!--", 3)) {
html_parser_goto_comment_end(self);
return 0;
}
@@ -401,7 +410,7 @@ static int html_parser_parse_tag_start(HtmlParser *self) {
res = html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size);
if(res != 0)
return res;
- self->inside_script_tag = string_view_equals_case_insensitive(&self->tag_name, &script_tag);
+ self->inside_script_tag = html_string_view_equals_case_insensitive(&self->tag_name, &script_tag);
}
res = self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata);
if(res != 0)
@@ -422,8 +431,8 @@ static int html_parser_parse_tag_end(HtmlParser *self) {
return 0;
} else if(!tag_name_found && is_identifier_char(c)) {
HtmlStringView tag_end_name;
- ssize_t found_start_tag_index;
- ssize_t i;
+ long found_start_tag_index;
+ long i;
tag_end_name.data = self->source + self->offset;
html_parser_advance_char(self);
@@ -446,7 +455,7 @@ static int html_parser_parse_tag_end(HtmlParser *self) {
found_start_tag_index = -1;
for(i = self->unclosed_tags_offset - 1; i >= 0; --i) {
- if(string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) {
+ if(html_string_view_equals_case_insensitive(&self->unclosed_tags[i], &tag_end_name)) {
found_start_tag_index = i;
break;
}