aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDEC05EBA <dec05eba@protonmail.com>2020-01-01 18:14:32 +0100
committerDEC05EBA <dec05eba@protonmail.com>2020-01-01 18:14:32 +0100
commit667a9e8f466f5c97b962b6885d575e6b01405542 (patch)
tree7757a93990565ee759aef6133faa28924d52753c
parent8d8e23320e48f1d8fd98c3c914696f6fe0f7161e (diff)
Do not skip whitespaces in text in pre tags
-rw-r--r--include/HtmlParser.h2
-rw-r--r--src/HtmlParser.c12
2 files changed, 12 insertions, 2 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index b01a59e..a60e6d5 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -41,11 +41,13 @@ struct HtmlParser {
int is_tag_void;
int inside_script_tag;
+ int pre_tag_depth;
size_t unclosed_tags_offset;
HtmlStringView unclosed_tags[UNCLOSED_TAGS_SIZE];
};
+/* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */
void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
void html_parser_deinit(HtmlParser *self);
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index 7c91a77..90c1149 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -93,6 +93,7 @@ static void html_parser_reset(HtmlParser *self) {
self->text.size = 0;
self->is_tag_void = 0;
self->inside_script_tag = 0;
+ self->pre_tag_depth = 0;
self->unclosed_tags_offset = 0;
}
@@ -104,7 +105,7 @@ void html_parser_init(HtmlParser *self, const char *html_source, size_t len, Htm
}
void html_parser_deinit(HtmlParser *self) {
-
+ (void)self;
}
static char html_parser_next_char(HtmlParser *self) {
@@ -354,6 +355,8 @@ static void html_parser_parse_tag_start(HtmlParser *self) {
if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) {
html_parser_goto_comment_end(self);
return;
+ } else if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) {
+ ++self->pre_tag_depth;
}
self->is_tag_void = is_void_tag(&self->tag_name);
if(!self->is_tag_void) {
@@ -409,6 +412,10 @@ static void html_parser_parse_tag_end(HtmlParser *self) {
self->tag_name = self->unclosed_tags[self->unclosed_tags_offset - 1];
self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
}
+
+ if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "pre", 3) == 0) {
+ --self->pre_tag_depth;
+ }
} else {
fprintf(stderr, "Warning: start tag not found for end tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data);
}
@@ -443,7 +450,8 @@ void html_parser_parse(HtmlParser *self) {
html_parser_advance_char(self);
}
self->text.size = (self->source + self->offset) - self->text.data;
- strip(self->text.data, self->text.size, &self->text.data, &self->text.size, is_whitespace);
+ if(self->pre_tag_depth == 0)
+ strip(self->text.data, self->text.size, &self->text.data, &self->text.size, is_whitespace);
if(self->text.size > 0)
self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata);
}