aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-07-02 17:42:37 +0200
committerdec05eba <dec05eba@protonmail.com>2021-07-02 17:42:37 +0200
commit2b1a4ac7ad5743400fa875a91ee6869fcd94a9ed (patch)
tree136f282c5fcd53e37d053f39f97acd607529380b
parent78f204d79c9ed61815a7eb206f2c1a4f4df289a0 (diff)
Return non-0 value from callback to cancel parsing (and return the value in html_parser_parse)
-rw-r--r--include/HtmlParser.h10
-rw-r--r--src/HtmlParser.c66
-rw-r--r--tests/main.c3
3 files changed, 54 insertions, 25 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index e6f0c3b..a7cdb4f 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -27,7 +27,8 @@ typedef enum{
HTML_PARSE_JAVASCRIPT_CODE
} HtmlParseType;
-typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata);
+/* Return 0 to continue */
+typedef int (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata);
#define UNCLOSED_TAGS_SIZE 2048
@@ -53,8 +54,11 @@ struct HtmlParser {
HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE];
};
-/* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */
-void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
+/*
+ Returns the value returned from |parse_callback|. 0 meaning success.
+ Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END
+*/
+int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
#ifdef __cplusplus
}
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index 7171d1f..aadf2a8 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -266,7 +266,7 @@ static void html_parser_goto_end_of_js_string(HtmlParser *self, char quote_symbo
}
}
-static void html_parser_goto_script_end_tag(HtmlParser *self) {
+static int html_parser_goto_script_end_tag(HtmlParser *self) {
self->text.data = self->source + self->offset;
self->text.size = 0;
for(;;) {
@@ -298,7 +298,7 @@ static void html_parser_goto_script_end_tag(HtmlParser *self) {
html_parser_advance_char(self);
}
}
- self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
+ return self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
}
static void html_parser_goto_comment_end(HtmlParser *self) {
@@ -311,13 +311,15 @@ static void html_parser_goto_comment_end(HtmlParser *self) {
}
}
-static void html_parser_parse_tag_start(HtmlParser *self) {
+static int html_parser_parse_tag_start(HtmlParser *self) {
int tag_name_found = 0;
for(;;) {
char c = html_parser_next_char(self);
if(c == '>') {
if(tag_name_found && self->is_tag_void) {
- self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ int res = self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ if(res != 0)
+ return res;
self->tag_name = self->tag_before_void_tag;
}
self->is_tag_void = 0;
@@ -325,15 +327,17 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
if(self->inside_script_tag) {
self->inside_script_tag = 0;
/* <script> tags require special handling since they can have </script> inside a javascript string */
- html_parser_goto_script_end_tag(self);
+ return html_parser_goto_script_end_tag(self);
}
- return;
+ return 0;
} else if(c == '/') {
html_parser_skip_whitespace(self);
if(html_parser_peek_char(self) == '>') {
html_parser_advance_char(self);
if(tag_name_found) {
- self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ int res = self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ if(res != 0)
+ return res;
if(self->is_tag_void)
self->tag_name = self->tag_before_void_tag;
else
@@ -341,7 +345,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
}
self->is_tag_void = 0;
self->inside_script_tag = 0;
- return;
+ return 0;
}
} else if(is_identifier_char(c)) {
HtmlStringView identifier;
@@ -356,6 +360,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
}
identifier.size = (self->source + self->offset) - identifier.data;
if(tag_name_found) {
+ int res = 0;
/* attribute name */
self->attribute_key = identifier;
/*html_string_view_to_lowercase(&self->attribute_key);*/
@@ -376,8 +381,11 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
html_parser_parse_attribute_value(self);
}
}
- self->parse_callback(self, HTML_PARSE_ATTRIBUTE, self->callback_userdata);
+ res = self->parse_callback(self, HTML_PARSE_ATTRIBUTE, self->callback_userdata);
+ if(res != 0)
+ return res;
} else {
+ int res = 0;
/* tag name */
HtmlStringView prev_tag_name = self->tag_name;
self->tag_name = identifier;
@@ -385,7 +393,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
tag_name_found = 1;
if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) {
html_parser_goto_comment_end(self);
- return;
+ return 0;
}
self->is_tag_void = is_void_tag(&self->tag_name);
if(self->is_tag_void) {
@@ -394,21 +402,23 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size);
self->inside_script_tag = string_view_equals_case_insensitive(&self->tag_name, &script_tag);
}
- self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata);
+ res = self->parse_callback(self, HTML_PARSE_TAG_START, self->callback_userdata);
+ if(res != 0)
+ return res;
}
} else if(c == '\0') {
- return;
+ return 0;
}
}
}
-static void html_parser_parse_tag_end(HtmlParser *self) {
+static int html_parser_parse_tag_end(HtmlParser *self) {
int tag_name_found = 0;
for(;;) {
char c = html_parser_peek_char(self);
if(c == '>') {
html_parser_advance_char(self);
- return;
+ return 0;
} else if(!tag_name_found && is_identifier_char(c)) {
HtmlStringView tag_end_name;
ssize_t found_start_tag_index;
@@ -443,37 +453,44 @@ static void html_parser_parse_tag_end(HtmlParser *self) {
if(found_start_tag_index != -1) {
for(; self->unclosed_tags_offset > (size_t)found_start_tag_index; --self->unclosed_tags_offset) {
+ int res = 0;
self->tag_name = self->unclosed_tags[self->unclosed_tags_offset - 1];
- self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ res = self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
+ if(res != 0)
+ return res;
}
} else {
/*fprintf(stderr, "Warning: start tag not found for end tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data);*/
}
} else if(c == '\0') {
- return;
+ return 0;
} else {
html_parser_advance_char(self);
}
}
}
-void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
+int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
HtmlStringView top_unclosed_tag;
HtmlParser self;
html_parser_init(&self, html_source, len, parse_callback, userdata);
for(;;) {
char c = html_parser_next_char(&self);
if(c == '<') {
+ int res = 0;
html_parser_skip_whitespace(&self);
if(html_parser_peek_char(&self) == '/') {
html_parser_advance_char(&self);
- html_parser_parse_tag_end(&self);
+ res = html_parser_parse_tag_end(&self);
} else {
- html_parser_parse_tag_start(&self);
+ res = html_parser_parse_tag_start(&self);
}
+ if(res != 0)
+ return res;
} else if(c == '\0') {
break;
} else {
+ int res = 0;
self.text.data = (self.source + self.offset) - 1;
for(;;) {
c = html_parser_peek_char(&self);
@@ -484,13 +501,20 @@ void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback pa
}
self.text.size = (self.source + self.offset) - self.text.data;
strip(self.text.data, self.text.size, &self.text_stripped.data, &self.text_stripped.size, is_whitespace);
- self.parse_callback(&self, HTML_PARSE_TEXT, self.callback_userdata);
+ res = self.parse_callback(&self, HTML_PARSE_TEXT, self.callback_userdata);
+ if(res != 0)
+ return res;
}
}
while(html_parser_try_get_top_unclosed_tag(&self, &top_unclosed_tag)) {
+ int res = 0;
self.tag_name = top_unclosed_tag;
- self.parse_callback(&self, HTML_PARSE_TAG_END, self.callback_userdata);
+ res = self.parse_callback(&self, HTML_PARSE_TAG_END, self.callback_userdata);
+ if(res != 0)
+ return res;
html_parser_pop_unclosed_tag(&self);
}
+
+ return 0;
}
diff --git a/tests/main.c b/tests/main.c
index de37c9a..b3b6d5c 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -20,7 +20,7 @@ char* file_get_content(const char *path, long *filesize) {
return data;
}
-static void html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata_any) {
+static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata_any) {
switch(parse_type) {
case HTML_PARSE_TAG_START:
printf("tag start: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data);
@@ -29,6 +29,7 @@ static void html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_typ
printf("tag end: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data);
break;
}
+ return 0;
}
int main() {