From fdfdf20d085a7c705477d878c11ed208577facb1 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 19 Oct 2020 00:16:11 +0200 Subject: Revert tag name to previous after void tag end (such as
) --- include/HtmlParser.h | 2 ++ src/HtmlParser.c | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/HtmlParser.h b/include/HtmlParser.h index 7536777..6a295aa 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -34,12 +34,14 @@ struct HtmlParser { HtmlParseCallback parse_callback; void *callback_userdata; + /* The name of the current enclosing tag */ HtmlStringView tag_name; HtmlStringView attribute_key; HtmlStringView attribute_value; HtmlStringView text; HtmlStringView text_stripped; + HtmlStringView tag_before_void_tag; int is_tag_void; int inside_script_tag; diff --git a/src/HtmlParser.c b/src/HtmlParser.c index 8b27d6d..861168e 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -102,6 +102,8 @@ static void html_parser_reset(HtmlParser *self) { self->text.size = 0; self->text_stripped.data = NULL; self->text_stripped.size = 0; + self->tag_before_void_tag.data = NULL; + self->tag_before_void_tag.size = 0; self->is_tag_void = 0; self->inside_script_tag = 0; self->unclosed_tags_offset = 0; @@ -308,8 +310,10 @@ static void html_parser_parse_tag_start(HtmlParser *self) { for(;;) { char c = html_parser_next_char(self); if(c == '>') { - if(tag_name_found && self->is_tag_void) + if(tag_name_found && self->is_tag_void) { self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); + self->tag_name = self->tag_before_void_tag; + } self->is_tag_void = 0; if(self->inside_script_tag) { @@ -323,7 +327,9 @@ static void html_parser_parse_tag_start(HtmlParser *self) { html_parser_advance_char(self); if(tag_name_found) { self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); - if(!self->is_tag_void) + if(self->is_tag_void) + self->tag_name = self->tag_before_void_tag; + else html_parser_try_pop_unclosed_tag(self); } self->is_tag_void = 0; @@ -366,6 +372,7 @@ static void html_parser_parse_tag_start(HtmlParser *self) { self->parse_callback(self, HTML_PARSE_ATTRIBUTE, self->callback_userdata); } else { /* tag name */ + HtmlStringView prev_tag_name = self->tag_name; self->tag_name = identifier; html_string_view_to_lowercase(&self->tag_name); tag_name_found = 1; @@ -374,7 +381,9 @@ static void html_parser_parse_tag_start(HtmlParser *self) { return; } self->is_tag_void = is_void_tag(&self->tag_name); - if(!self->is_tag_void) { + if(self->is_tag_void) { + self->tag_before_void_tag = prev_tag_name; + } else { html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size); self->inside_script_tag = string_view_equals(&self->tag_name, &script_tag); } -- cgit v1.2.3