From fd5527bade00b4692eedb14a801527ceeb342d7c Mon Sep 17 00:00:00 2001 From: DEC05EBA Date: Sat, 4 Jan 2020 20:03:46 +0100 Subject: Call parse callback even for empty text/javascript. Makes tools like xpath easier --- include/HtmlParser.h | 3 ++- src/HtmlParser.c | 18 +++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/include/HtmlParser.h b/include/HtmlParser.h index a60e6d5..fc3ff3a 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -38,10 +38,11 @@ struct HtmlParser { HtmlStringView attribute_key; HtmlStringView attribute_value; HtmlStringView text; + /* Only used when parse_type is HTML_PARSE_TEXT */ + HtmlStringView text_stripped; int is_tag_void; int inside_script_tag; - int pre_tag_depth; size_t unclosed_tags_offset; HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE]; diff --git a/src/HtmlParser.c b/src/HtmlParser.c index 90c1149..8972730 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -91,9 +91,10 @@ static void html_parser_reset(HtmlParser *self) { self->attribute_value.size = 0; self->text.data = NULL; self->text.size = 0; + self->text_stripped.data = NULL; + self->text_stripped.size = 0; self->is_tag_void = 0; self->inside_script_tag = 0; - self->pre_tag_depth = 0; self->unclosed_tags_offset = 0; } @@ -274,8 +275,7 @@ static void html_parser_goto_script_end_tag(HtmlParser *self) { html_parser_advance_char(self); } } - if(self->text.size > 0) - self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata); + self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata); } static void html_parser_goto_comment_end(HtmlParser *self) { @@ -355,8 +355,6 @@ static void html_parser_parse_tag_start(HtmlParser *self) { if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) { html_parser_goto_comment_end(self); return; - } else if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) { - ++self->pre_tag_depth; } self->is_tag_void = is_void_tag(&self->tag_name); if(!self->is_tag_void) { @@ -412,10 +410,6 @@ static void html_parser_parse_tag_end(HtmlParser *self) { self->tag_name = self->unclosed_tags[self->unclosed_tags_offset - 1]; self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); } - - if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) { - --self->pre_tag_depth; - } } else { fprintf(stderr, "Warning: start tag not found for end tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data); } @@ -450,10 +444,8 @@ void html_parser_parse(HtmlParser *self) { html_parser_advance_char(self); } self->text.size = (self->source + self->offset) - self->text.data; - if(self->pre_tag_depth == 0) - strip(self->text.data, self->text.size, &self->text.data, &self->text.size, is_whitespace); - if(self->text.size > 0) - self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata); + strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace); + self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata); } } -- cgit v1.2.3