From efdd24c40d9d6ffa5207ddc369b03eba86e9e22e Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 28 Apr 2021 19:46:29 +0200 Subject: Simplify api, use const char* for input string --- include/HtmlParser.h | 9 +++---- src/HtmlParser.c | 68 ++++++++++++++++++++++++---------------------------- tests/main.c | 9 +++---- 3 files changed, 37 insertions(+), 49 deletions(-) diff --git a/include/HtmlParser.h b/include/HtmlParser.h index 6a295aa..0bd7203 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -9,7 +9,7 @@ #include typedef struct { - char *data; + const char *data; size_t size; } HtmlStringView; @@ -28,7 +28,7 @@ typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_t #define UNCLOSED_TAGS_SIZE 2048 struct HtmlParser { - char *source; + const char *source; size_t source_len; size_t offset; HtmlParseCallback parse_callback; @@ -50,9 +50,6 @@ struct HtmlParser { }; /* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */ -void html_parser_init(HtmlParser *self, char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); -void html_parser_deinit(HtmlParser *self); - -void html_parser_parse(HtmlParser *self); +void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); #endif /* HTML_PARSER_H */ diff --git a/src/HtmlParser.c b/src/HtmlParser.c index f85a633..0eb1275 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -60,7 +60,7 @@ static int is_newline(int c) { return c == '\n' || c == '\r'; } -static void lstrip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { +static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { size_t i = 0; while(i < size && strip_filter_func(str[i])) { ++i; @@ -69,7 +69,7 @@ static void lstrip(char *str, size_t size, char **output_str, size_t *output_siz *output_size = size - i; } -static void rstrip(char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { +static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { ssize_t i = size - 1; while(i >= 0 && strip_filter_func(str[i])) { --i; @@ -77,17 +77,17 @@ static void rstrip(char *str, size_t size, size_t *output_size, int(*strip_filte *output_size = i + 1; } -static void strip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { +static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { lstrip(str, size, output_str, output_size, strip_filter_func); rstrip(*output_str, *output_size, output_size, strip_filter_func); } -static void html_string_view_to_lowercase(HtmlStringView *string_view) { +/*static void html_string_view_to_lowercase(HtmlStringView *string_view) { size_t i = 0; for(; i < string_view->size; ++i) { string_view->data[i] = to_lower(string_view->data[i]); } -} +}*/ static int is_void_tag(HtmlStringView *tag_name) { HtmlStringView *tag_iter = &void_tags[0]; @@ -102,7 +102,12 @@ static int is_void_tag(HtmlStringView *tag_name) { return 0; } -static void html_parser_reset(HtmlParser *self) { +static void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { + self->source = html_source; + self->source_len = len; + self->parse_callback = parse_callback; + self->callback_userdata = userdata; + self->offset = 0; self->tag_name.data = NULL; self->tag_name.size = 0; @@ -121,17 +126,6 @@ static void html_parser_reset(HtmlParser *self) { self->unclosed_tags_offset = 0; } -void html_parser_init(HtmlParser *self, char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { - self->source = html_source; - self->source_len = len; - self->parse_callback = parse_callback; - self->callback_userdata = userdata; -} - -void html_parser_deinit(HtmlParser *self) { - (void)self; -} - static char html_parser_next_char(HtmlParser *self) { if(self->offset < self->source_len) { char c = self->source[self->offset]; @@ -154,7 +148,7 @@ static void html_parser_advance_char(HtmlParser *self) { ++self->offset; } -static void html_parser_try_append_unclosed_tag(HtmlParser *self, char *data, size_t size) { +static void html_parser_try_append_unclosed_tag(HtmlParser *self, const char *data, size_t size) { if(self->unclosed_tags_offset == UNCLOSED_TAGS_SIZE) { fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n"); return; @@ -463,40 +457,40 @@ static void html_parser_parse_tag_end(HtmlParser *self) { } } -void html_parser_parse(HtmlParser *self) { +void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) { HtmlStringView top_unclosed_tag; - - html_parser_reset(self); + HtmlParser self; + html_parser_init(&self, html_source, len, parse_callback, userdata); for(;;) { - char c = html_parser_next_char(self); + char c = html_parser_next_char(&self); if(c == '<') { - html_parser_skip_whitespace(self); - if(html_parser_peek_char(self) == '/') { - html_parser_advance_char(self); - html_parser_parse_tag_end(self); + html_parser_skip_whitespace(&self); + if(html_parser_peek_char(&self) == '/') { + html_parser_advance_char(&self); + html_parser_parse_tag_end(&self); } else { - html_parser_parse_tag_start(self); + html_parser_parse_tag_start(&self); } } else if(c == '\0') { break; } else { - self->text.data = (self->source + self->offset) - 1; + self.text.data = (self.source + self.offset) - 1; for(;;) { - c = html_parser_peek_char(self); + c = html_parser_peek_char(&self); if(c == '<' || c == '\0') break; else - html_parser_advance_char(self); + html_parser_advance_char(&self); } - self->text.size = (self->source + self->offset) - self->text.data; - strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace); - self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata); + self.text.size = (self.source + self.offset) - self.text.data; + strip(self.text.data, self.text.size, &self.text_stripped.data, &self.text_stripped.size, is_whitespace); + self.parse_callback(&self, HTML_PARSE_TEXT, self.callback_userdata); } } - while(html_parser_try_get_top_unclosed_tag(self, &top_unclosed_tag)) { - self->tag_name = top_unclosed_tag; - self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); - html_parser_pop_unclosed_tag(self); + while(html_parser_try_get_top_unclosed_tag(&self, &top_unclosed_tag)) { + self.tag_name = top_unclosed_tag; + self.parse_callback(&self, HTML_PARSE_TAG_END, self.callback_userdata); + html_parser_pop_unclosed_tag(&self); } } diff --git a/tests/main.c b/tests/main.c index 6d84cfa..de37c9a 100644 --- a/tests/main.c +++ b/tests/main.c @@ -23,10 +23,10 @@ char* file_get_content(const char *path, long *filesize) { static void html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata_any) { switch(parse_type) { case HTML_PARSE_TAG_START: - printf("tag start: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data); + printf("tag start: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data); break; case HTML_PARSE_TAG_END: - printf("tag end: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data); + printf("tag end: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data); break; } } @@ -39,10 +39,7 @@ int main() { return 1; } - HtmlParser html_parser; - html_parser_init(&html_parser, file_data, filesize, html_parse_callback, NULL); - html_parser_parse(&html_parser); - html_parser_deinit(&html_parser); + html_parser_parse(file_data, filesize, html_parse_callback, NULL); free(file_data); return 0; } -- cgit v1.2.3