diff options
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | include/HtmlParser.h | 5 | ||||
-rw-r--r-- | project.conf | 2 |
3 files changed, 8 insertions, 5 deletions
@@ -1,5 +1,5 @@ -A small html parser written in C. The parser fixes broken html (missing end tags). The parser doesn't perform any dynamic allocations (heap) and neither copies any text, and only outputs the parsing result to a callback function rather than a dom tree. +A small html parser written in ansi c (c89). The parser fixes broken html (missing end tags). The parser doesn't perform any dynamic allocations (heap) and neither copies any text, and only outputs the parsing result to a callback function rather than a dom tree. This html parser can also be used to parse xml files with namespaces, such as rss feeds. -# TODO -Unescape html sequences in text and attribute values +# Note +This library does not decode html sequences in text and attribute values diff --git a/include/HtmlParser.h b/include/HtmlParser.h index a7cdb4f..5c1f4c1 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -56,7 +56,10 @@ struct HtmlParser { /* Returns the value returned from |parse_callback|. 0 meaning success. - Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END + Note: HTML_PARSE_TAG_START is guaranteed to be called for a tag before HTML_PARSE_TAG_END. + Note: HTML_PARSE_TEXT may be called multiple times for a tag. For example if a tag has multiple text items split between child tags + like this: <div>hello<h1>text</h1>world</div>. + In this case, HTML_PARSE_TEXT will be called twice for the div tag. First with "hello" and then with "world". */ int html_parser_parse(const char *html_source, size_t len, HtmlParseCallback parse_callback, void *userdata); diff --git a/project.conf b/project.conf index c83623b..f24212f 100644 --- a/project.conf +++ b/project.conf @@ -1,7 +1,7 @@ [package] name = "html-parser" type = "static" -version = "0.1.0" +version = "1.0.0" platforms = ["any"] [lang.c] |