aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/HtmlParser.h9
-rw-r--r--src/HtmlParser.c68
-rw-r--r--tests/main.c9
3 files changed, 37 insertions, 49 deletions
diff --git a/include/HtmlParser.h b/include/HtmlParser.h
index 6a295aa..0bd7203 100644
--- a/include/HtmlParser.h
+++ b/include/HtmlParser.h
@@ -9,7 +9,7 @@
#include <stddef.h>
typedef struct {
- char *data;
+ const char *data;
size_t size;
} HtmlStringView;
@@ -28,7 +28,7 @@ typedef void (*HtmlParseCallback)(HtmlParser *html_parser, HtmlParseType parse_t
#define UNCLOSED_TAGS_SIZE 2048
struct HtmlParser {
- char *source;
+ const char *source;
size_t source_len;
size_t offset;
HtmlParseCallback parse_callback;
@@ -50,9 +50,6 @@ struct HtmlParser {
};
/* Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END */
-void html_parser_init(HtmlParser *self, char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
-void html_parser_deinit(HtmlParser *self);
-
-void html_parser_parse(HtmlParser *self);
+void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata);
#endif /* HTML_PARSER_H */
diff --git a/src/HtmlParser.c b/src/HtmlParser.c
index f85a633..0eb1275 100644
--- a/src/HtmlParser.c
+++ b/src/HtmlParser.c
@@ -60,7 +60,7 @@ static int is_newline(int c) {
return c == '\n' || c == '\r';
}
-static void lstrip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
+static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
size_t i = 0;
while(i < size && strip_filter_func(str[i])) {
++i;
@@ -69,7 +69,7 @@ static void lstrip(char *str, size_t size, char **output_str, size_t *output_siz
*output_size = size - i;
}
-static void rstrip(char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) {
+static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) {
ssize_t i = size - 1;
while(i >= 0 && strip_filter_func(str[i])) {
--i;
@@ -77,17 +77,17 @@ static void rstrip(char *str, size_t size, size_t *output_size, int(*strip_filte
*output_size = i + 1;
}
-static void strip(char *str, size_t size, char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
+static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
lstrip(str, size, output_str, output_size, strip_filter_func);
rstrip(*output_str, *output_size, output_size, strip_filter_func);
}
-static void html_string_view_to_lowercase(HtmlStringView *string_view) {
+/*static void html_string_view_to_lowercase(HtmlStringView *string_view) {
size_t i = 0;
for(; i < string_view->size; ++i) {
string_view->data[i] = to_lower(string_view->data[i]);
}
-}
+}*/
static int is_void_tag(HtmlStringView *tag_name) {
HtmlStringView *tag_iter = &void_tags[0];
@@ -102,7 +102,12 @@ static int is_void_tag(HtmlStringView *tag_name) {
return 0;
}
-static void html_parser_reset(HtmlParser *self) {
+static void html_parser_init(HtmlParser *self, const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
+ self->source = html_source;
+ self->source_len = len;
+ self->parse_callback = parse_callback;
+ self->callback_userdata = userdata;
+
self->offset = 0;
self->tag_name.data = NULL;
self->tag_name.size = 0;
@@ -121,17 +126,6 @@ static void html_parser_reset(HtmlParser *self) {
self->unclosed_tags_offset = 0;
}
-void html_parser_init(HtmlParser *self, char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
- self->source = html_source;
- self->source_len = len;
- self->parse_callback = parse_callback;
- self->callback_userdata = userdata;
-}
-
-void html_parser_deinit(HtmlParser *self) {
- (void)self;
-}
-
static char html_parser_next_char(HtmlParser *self) {
if(self->offset < self->source_len) {
char c = self->source[self->offset];
@@ -154,7 +148,7 @@ static void html_parser_advance_char(HtmlParser *self) {
++self->offset;
}
-static void html_parser_try_append_unclosed_tag(HtmlParser *self, char *data, size_t size) {
+static void html_parser_try_append_unclosed_tag(HtmlParser *self, const char *data, size_t size) {
if(self->unclosed_tags_offset == UNCLOSED_TAGS_SIZE) {
fprintf(stderr, "Reached the maximum number of unclosed tags! the html source is too broken\n");
return;
@@ -463,40 +457,40 @@ static void html_parser_parse_tag_end(HtmlParser *self) {
}
}
-void html_parser_parse(HtmlParser *self) {
+void html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata) {
HtmlStringView top_unclosed_tag;
-
- html_parser_reset(self);
+ HtmlParser self;
+ html_parser_init(&self, html_source, len, parse_callback, userdata);
for(;;) {
- char c = html_parser_next_char(self);
+ char c = html_parser_next_char(&self);
if(c == '<') {
- html_parser_skip_whitespace(self);
- if(html_parser_peek_char(self) == '/') {
- html_parser_advance_char(self);
- html_parser_parse_tag_end(self);
+ html_parser_skip_whitespace(&self);
+ if(html_parser_peek_char(&self) == '/') {
+ html_parser_advance_char(&self);
+ html_parser_parse_tag_end(&self);
} else {
- html_parser_parse_tag_start(self);
+ html_parser_parse_tag_start(&self);
}
} else if(c == '\0') {
break;
} else {
- self->text.data = (self->source + self->offset) - 1;
+ self.text.data = (self.source + self.offset) - 1;
for(;;) {
- c = html_parser_peek_char(self);
+ c = html_parser_peek_char(&self);
if(c == '<' || c == '\0')
break;
else
- html_parser_advance_char(self);
+ html_parser_advance_char(&self);
}
- self->text.size = (self->source + self->offset) - self->text.data;
- strip(self->text.data, self->text.size, &self->text_stripped.data, &self->text_stripped.size, is_whitespace);
- self->parse_callback(self, HTML_PARSE_TEXT, self->callback_userdata);
+ self.text.size = (self.source + self.offset) - self.text.data;
+ strip(self.text.data, self.text.size, &self.text_stripped.data, &self.text_stripped.size, is_whitespace);
+ self.parse_callback(&self, HTML_PARSE_TEXT, self.callback_userdata);
}
}
- while(html_parser_try_get_top_unclosed_tag(self, &top_unclosed_tag)) {
- self->tag_name = top_unclosed_tag;
- self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata);
- html_parser_pop_unclosed_tag(self);
+ while(html_parser_try_get_top_unclosed_tag(&self, &top_unclosed_tag)) {
+ self.tag_name = top_unclosed_tag;
+ self.parse_callback(&self, HTML_PARSE_TAG_END, self.callback_userdata);
+ html_parser_pop_unclosed_tag(&self);
}
}
diff --git a/tests/main.c b/tests/main.c
index 6d84cfa..de37c9a 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -23,10 +23,10 @@ char* file_get_content(const char *path, long *filesize) {
static void html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata_any) {
switch(parse_type) {
case HTML_PARSE_TAG_START:
- printf("tag start: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data);
+ printf("tag start: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data);
break;
case HTML_PARSE_TAG_END:
- printf("tag end: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data);
+ printf("tag end: %.*s\n", (int)html_parser->tag_name.size, html_parser->tag_name.data);
break;
}
}
@@ -39,10 +39,7 @@ int main() {
return 1;
}
- HtmlParser html_parser;
- html_parser_init(&html_parser, file_data, filesize, html_parse_callback, NULL);
- html_parser_parse(&html_parser);
- html_parser_deinit(&html_parser);
+ html_parser_parse(file_data, filesize, html_parse_callback, NULL);
free(file_data);
return 0;
}