aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDEC05EBA <dec05eba@protonmail.com>2020-01-04 20:03:46 +0100
committerDEC05EBA <dec05eba@protonmail.com>2020-01-04 20:03:46 +0100
commitfd5527bade00b4692eedb14a801527ceeb342d7c (patch)
tree02f0def263f68f5271db932814e2bd6dce9d551f
parentf0535cffe4e159e378ee8fe8201c0ec306d9f824 (diff)
Call parse callback even for empty text/javascript. Makes tools like xpath easier
-rw-r--r--include/HtmlParser.h3
-rw-r--r--src/HtmlParser.c18
2 files changed, 7 insertions, 14 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index a60e6d5..fc3ff3a 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -38,10 +38,11 @@ struct HtmlParser {
HtmlStringView attribute_key;
HtmlStringView attribute_value;
HtmlStringView text;
+ /* Only used when parse_type is HTML_PARSE_TEXT */
+ HtmlStringView text_stripped;
int is_tag_void;
int inside_script_tag;
- int pre_tag_depth;
size_t unclosed_tags_offset;
HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE];
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index 90c1149..8972730 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -91,9 +91,10 @@ static void html_parser_reset(HtmlParser *self) {
self->attribute_value.size = 0;
self->text.data = NULL;
self->text.size = 0;
+ self->text_stripped.data = NULL;
+ self->text_stripped.size = 0;
self->is_tag_void = 0;
self->inside_script_tag = 0;
- self->pre_tag_depth = 0;
self->unclosed_tags_offset = 0;
}
@@ -274,8 +275,7 @@ static void html_parser_goto_script_end_tag(HtmlParser *self) {
html_parser_advance_char(self);
}
}
- if(self->text.size > 0)
- self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
+ self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata);
}
static void html_parser_goto_comment_end(HtmlParser *self) {
@@ -355,8 +355,6 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) {
html_parser_goto_comment_end(self);
return;
- } else if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) {
- ++self->pre_tag_depth;
}
self->is_tag_void = is_void_tag(&self->tag_name);
if(!self->is_tag_void) {
@@ -412,10 +410,6 @@ static void html_parser_parse_tag_end(HtmlParser *self) {
self->tag_name = self->unclosed_tags[self->unclosed_tags_offset - 1];
self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
}
-
- if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) {
- --self->pre_tag_depth;
- }
} else {
fprintf(stderr, "Warning: start tag not found for end tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data);
}
@@ -450,10 +444,8 @@ void html_parser_parse(HtmlParser *self) {
html_parser_advance_char(self);
}
self->text.size = (self->source + self->offset) - self->text.data;
- if(self->pre_tag_depth == 0)
- strip(self->text.data, self->text.size, &self->text.data, &self->text.size, is_whitespace);
- if(self->text.size > 0)
- self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata);
+ strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace);
+ self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata);
}
}