From 8d8e23320e48f1d8fd98c3c914696f6fe0f7161e Mon Sep 17 00:00:00 2001 From: DEC05EBA Date: Wed, 1 Jan 2020 09:13:18 +0100 Subject: Ignore comments, ignore end tags without a start tag. Fixes tags closing too soon --- include/HtmlParser.h | 2 +- src/HtmlParser.c | 42 +- tests/github.html | 1592 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/main.c | 46 +- 4 files changed, 1670 insertions(+), 12 deletions(-) create mode 100644 tests/github.html diff --git a/include/HtmlParser.h b/include/HtmlParser.h index 48660da..b01a59e 100644 --- a/include/HtmlParser.h +++ b/include/HtmlParser.h @@ -51,4 +51,4 @@ void html_parser_deinit(HtmlParser *self); void html_parser_parse(HtmlParser *self); -#endif /* HTML_PARSER_H */ \ No newline at end of file +#endif /* HTML_PARSER_H */ diff --git a/src/HtmlParser.c b/src/HtmlParser.c index 81104b7..7c91a77 100644 --- a/src/HtmlParser.c +++ b/src/HtmlParser.c @@ -125,7 +125,8 @@ static char html_parser_peek_char(HtmlParser *self) { } static void html_parser_advance_char(HtmlParser *self) { - ++self->offset; + if(self->offset < self->source_len) + ++self->offset; } static int is_alpha(char c) { @@ -248,7 +249,7 @@ static void html_parser_goto_script_end_tag(HtmlParser *self) { if(c == '"' || c == '\'') { html_parser_advance_char(self); html_parser_goto_end_of_js_string(self, c); - } else if(c == '<' && self->offset + 7 < self->source_len && strncmp(self->source + self->offset + 1, "/script", 7) == 0) { + } else if(c == '<' && self->offset + 7 < self->source_len && memcmp(self->source + self->offset + 1, "/script", 7) == 0) { self->text.size = (self->source + self->offset) - self->text.data; strip(self->text.data, self->text.size, &self->text.data, &self->text.size, is_whitespace); self->offset += 7; @@ -276,6 +277,16 @@ static void html_parser_goto_script_end_tag(HtmlParser *self) { self->parse_callback(self, HTML_PARSE_JAVASCRIPT_CODE, self->callback_userdata); } +static void html_parser_goto_comment_end(HtmlParser *self) { + for(;;) { + if(self->source_len - self->offset >= 3 && memcmp(self->source + self->offset, "-->", 3) == 0) { + self->offset += 3; + break; + } + html_parser_advance_char(self); + } +} + static void html_parser_parse_tag_start(HtmlParser *self) { int tag_name_found = 0; for(;;) { @@ -340,6 +351,10 @@ static void html_parser_parse_tag_start(HtmlParser *self) { /* tag name */ self->tag_name = identifier; tag_name_found = 1; + if(self->tag_name.size == 3 && memcmp(self->tag_name.data, "!--", 3) == 0) { + html_parser_goto_comment_end(self); + return; + } self->is_tag_void = is_void_tag(&self->tag_name); if(!self->is_tag_void) { html_parser_try_append_unclosed_tag(self, self->tag_name.data, self->tag_name.size); @@ -373,20 +388,29 @@ static void html_parser_parse_tag_end(HtmlParser *self) { } } tag_end_name.size = (self->source + self->offset) - tag_end_name.data; + tag_name_found = 1; /* void tags close themselves, this is probably invalid html but we choose to ignore it silently */ if(is_void_tag(&tag_end_name)) { - fprintf(stderr, "Warning: got end tag for void tag '%.*s'\n", tag_end_name.size, tag_end_name.data); + fprintf(stderr, "Warning: got end tag for void tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data); continue; } - HtmlStringView top_unclosed_tag; - while(html_parser_try_get_top_unclosed_tag(self, &top_unclosed_tag)) { - self->tag_name = top_unclosed_tag; - self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); - html_parser_pop_unclosed_tag(self); - if(string_view_equals(&top_unclosed_tag, &tag_end_name)) + ssize_t found_start_tag_index = -1; + for(ssize_t i = self->unclosed_tags_offset - 1; i >= 0; --i) { + if(string_view_equals(&self->unclosed_tags[i], &tag_end_name)) { + found_start_tag_index = i; break; + } + } + + if(found_start_tag_index != -1) { + for(; self->unclosed_tags_offset > (size_t)found_start_tag_index; --self->unclosed_tags_offset) { + self->tag_name = self->unclosed_tags[self->unclosed_tags_offset - 1]; + self->parse_callback(self, HTML_PARSE_TAG_END, self->callback_userdata); + } + } else { + fprintf(stderr, "Warning: start tag not found for end tag '%.*s'\n", (int)tag_end_name.size, tag_end_name.data); } } else if(c == '\0') { return; diff --git a/tests/github.html b/tests/github.html new file mode 100644 index 0000000..b0f8bfb --- /dev/null +++ b/tests/github.html @@ -0,0 +1,1592 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GitHub - DEC05EBA/sibs: Simple build system for native languages. Similar to rusts cargo, but for c, c++ and zig . Mirror of https://gitlab.com/DEC05EBA/sibs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Skip to content + + + + + + + + + + + +
+ +
+ + +
+ +
+ + + +
+
+
+ + + + + + + + + + + + + +
+
+ + + + + + +
+ + Simple build system for native languages. Similar to rusts cargo, but for c, c++ and zig . Mirror of https://gitlab.com/DEC05EBA/sibs + + +
+
+ + + + + + + +
+ +
+ C++ + Python + Shell + CMake +
+
+ +
+ + + + + + +
+ +
+ + Branch: + master + + + + + + + +
+ + + + + + +
+ + Find file +
+ + + + + + +
+ + Clone or download + +
+ +
+
+ +
+ + + + + +
+ +
+
+ + @DEC05EBA +
+
+ + +
+ + + + Latest commit + + 00007a7 + + Dec 31, 2019 +
+
+ + + +
+ Permalink + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeNameLatest commit messageCommit time
Failed to load latest commit information.
+ + + + backend + + + Add error_on_warning option to allow turning compiler warnings to errors + + + Dec 31, 2019 +
+ + + + cmake + + + Add OpenBSD support + + + Sep 30, 2018 +
+ + + + depends + + + Temporary always build binaries + + + Nov 25, 2018 +
+ + + + examples + + + Move build files into platform specific location + + + Nov 9, 2018 +
+ + + + external + + + Use packages list to find packages + + + Jan 4, 2018 +
+ + + + include + + + Add error_on_warning option to allow turning compiler warnings to errors + + + Dec 31, 2019 +
+ + + + msvc + + + Add more include paths for msvc + + + Oct 27, 2018 +
+ + + + scripts + + + Compile cmake sub project as cmake + + + Jun 8, 2019 +
+ + + + src + + + Add error_on_warning option to allow turning compiler warnings to errors + + + Dec 31, 2019 +
+ + + + static/windows + + + Fix bundle for non system libraries + + + Oct 2, 2018 +
+ + + + tests + + + Fix version range not ending at next major version when not defining end + + + Oct 18, 2018 +
+ + + + .gitignore + + + Fix for windows & mingw + + + Oct 5, 2018 +
+ + + + .gitmodules + + + Use ninja library to generate build instead of raw string + + + Sep 20, 2018 +
+ + + + CMakeLists.txt + + + Use ranges for dependency version + + + Oct 18, 2018 +
+ + + + LICENSE + + + Initial commit + + + Dec 7, 2017 +
+ + + + README.md + + + Add error_on_warning option to allow turning compiler warnings to errors + + + Dec 31, 2019 +
+ + + + install.sh + + + Move build files into platform specific location + + + Nov 9, 2018 +
+ + + + preview.png + + + Update README with info about IDE support + + + Oct 10, 2018 +
+ + + + project.conf + + + Update version + + + Apr 9, 2019 +
+ + + + sibs_multilib.kdev4 + + + Add sanitize build/test option, currently ignored if gcc is not used + + + May 24, 2018 +
+ +
+ + + + +
+
+

+ + README.md +

+
+ + +
+

Simple Build System for Native Languages

+

Sibs is inspired by Cargo, you can think of it like a C/C++/Zig version of Cargo. Sibs can build cmake projects as well, so you can use sibs with existing cmake projects with minimal work. +List of packages can be found at https://gitlab.com/DEC05EBA/sibs_packages/raw/master/packages.json

+

Usage

+
Usage: sibs COMMAND
+
+Simple Build System for Native Languages
+
+Commands:
+  build         Build a project that contains a project.conf file
+  new           Create a new project
+  init          Initialize project in an existing directory
+  test          Build and run tests for a sibs project
+  package       Create a redistributable package from a sibs project. Note: Redistributable packages can't use system packages to build
+  platform      Print name of platform (to stdout) and exit
+  platforms     Print list of supported platforms (to stdout) and exit
+
+

Examples

+

Here is a minimal config file:

+
[package]
+name = "hello_world"
+type = "executable"
+version = "0.1.0"
+platforms = ["any"]
+
+

There are full project examples with dependencies in the examples directory. +Use sibs init to create a project, which includes a config file to get started and then build with with sibs build +and run the binary under sibs-build/<platform>/debug/<executable_name>.

+

Supported platforms

+ + + + + + + + + + + + + + + + + + + + + +
LinuxWindows(1)MacOSOpenBSDHaiku...
TBD(2)
+

(1). Msvc, mingw-w64 and cygwin are supported. Cygwin is defined as a linux platform while mingw-w64 is defined as a windows system.
+(2). Sibs is intended to work on as many platforms as possible, you can help by porting sibs to another platform. Should only be minor changes if the platform is unix-like.

+

Linux is the primary platform, the platform which master branch is guaranteed to compile on.

+

Dependencies

+

libcurl, libarchive, libgit2, ninja, cmake

+

Installation

+

Posix (Linux, MacOS, OpenBSD, Haiku)

+

./cmake/install.sh

+

Windows

+

Use vcpkg to install the required dependencies and then generate visual studio (or another system) build files using CMakeLists.txt

+

Usage

+

After you have installed sibs, execute sibs without any arguments and you will get a list of commands and description for them. For debug builds, the created binary/library files will be located under sibs-build/<platform>/debug. For example on linux x86_64, the path for binaries would be: sibs-build/linux_x86_64/debug.

+

Quirks

+

Zig support has not been tested properly yet and currently always links to c library. +You can run zig tests with sibs test --file filepath or sibs test --all-files. +Currently zig tests are cached because ninja build system is used, which means if source files do not change between runs. +Currently zig files generate header files and include exported functions into sibs-build/<platform>/generated-headers/zig and the generated headers +are usable from c/c++ by using including: #include <zig/INSERT_ZIG_HEADER_FILE_NAME_HERE>. +If your project contains zig files then it will currently only run on Linux, Windows and MacOS as zig doesn't support more platforms at the moment.

+

Package

+

Sibs supports creating a redistributable packages of projects (currently only on Linux, run sibs package --bundle). Packaging is in testing phase and may not work for all projects. Currently you need to have python3 and ldd installed and also set the environment variable SIBS_SCRIPT_DIR to scripts sub directory which is in sibs root directory (the directory that contains package.py). +Currently a script file is generated which should be used to run the project. The name of the script file is the same as project. This script file will most likely to be removed later. Do NOT run the executable called "program". +Because creating a package is currently done by copying c/c++ libraries and precompiled shared libraries on Linux usually depend on gcc runtime libraries which are very large, the distributable package becomes very large; a hello world application extracted from its archive is 6 megabytes... +If you want to reduce the size of your package then you will have to compile your project and each dependency from source with clang/musl (gcc c++ runtime is 14mb while clang c++ runtime is 800kb!).

+

The package command also comes with --bundle-install option which reduces the size of the distributable package by removing libraries in the package that can be downloaded online, and instead the user will download missing libraries when launching the application for the first time (the libraries are cached). This option is good because if the user already has the libraries installed on their system with a package managed then the user dont have to download the libraries and if the user has other software that was distributed using sibs, then their libraries will be shared with your projects; meaning if one project has a library of one version then it's shared with all software that uses same version of the library.

+

Users are required to manually install some libraries as they can't be included in a distributed package (install with their package manager). These libraries are commonly gpu driver libraries, which vary even if you have the same cpu architecture. +This requirement might be removed later, if the gpu driver libraries required can somehow be detected and downloaded cross platform. +Libraries that are downloaded are available at: https://github.com/DEC05EBA/libraries

+

Cross compilation

+

Automatic cross compilation (sibs build --platform <platform>)currently only works from linux_x86_64 to win64 by using mingw-w64. You need to install mingw-w64-gcc and optionally mingw-w64-pkg-config if you want to use mingw-w64 system installed packages. +Cross compilation does currently not work if you have zig files as zig doesn't support libc when cross compiling at the moment. +You can run scripts/mingw_package.py to automatically copy dynamic library dependencies of your executable to the same directory as the executable, so the library can be found when running the executable on windows; this also allows you to bundle your application and distribute it without external dependencies. To run scripts/mingw_package.py you need to install pefile python library sudo pip install pefile.

+

Manual cross compilation can be done by replacing c, c++ compilers and linker (ar) using the environment variable CC, CXX and AR.

+

IDE support

+

Sibs generates a compile_commands.json in the project root directory when executing sibs build and tools that support clang completion can be used, such as YouCompleteMe or cquery. +There are several editors that support YouCompleteMe, including Vim, Emacs and Visual Studio Code. Visual studio code now also supports clang completion with C/C++ extension by Microsoft. I recommend using Visual Studio Code along with cquery (https://github.com/cquery-project/cquery/wiki), which gives you very good IDE support for your C/C++ projects: +Image of cquery extension in Visual Studio Code +If you are using Visual Studio Code then you should add .vscode/ to .gitignore or Visual Studio Code will lag a lot (because cquery adds a lot of files in .vscode directory).

+

Tests

+

If your project contains a sub directory called "tests" then that directory will be used a test project. The test directory may contain a project.conf file which can contain [dependencies] block for specifying test only dependencies. The test automatically includes the parent project as a dependency.

+

Project configuration template

+
[package]
+name = "packageName"
+type = "library"
+version = "0.1.0"
+platforms = ["any"]
+authors = ["DEC05EBA <0xdec05eba@gmail.com>"]
+
+[dependencies]
+catch2 = "0.1.0"
+xxhash = "0.1.0"
+cisb = { git = "https://github.com/DEC05EBA/cisb.git", branch = "master", revision = "c0c46a4" }
+
+[lang.c]
+version = "c11"
+
+[lang.cpp]
+version = "c++14"
+
+[define]
+BOOST_ASIO_SEPERATE_COMPILATION = "1"
+
+[define.static]
+BOOST_COMPILE_STATIC = "1"
+
+[define.dynamic]
+BOOST_COMPILE_DYNAMIC = "1"
+
+[config]
+include_dirs = ["include"]
+ignore_dirs = ["examples"]
+expose_include_dirs = ["include"]
+error_on_warning = "true"
+
+[config.win32.static.debug]
+lib = "windows/x86/static/debug"
+
+[config.win32.static.release]
+lib = "windows/x86/static/release"
+
+[config.win64.static.debug]
+lib = "windows/x64/static/debug"
+
+[cmake]
+dir = "."
+args = ["ENTITYX_RUN_BENCHMARKS=0"]
+
+[cmake.static]
+args = ["ENTITYX_BUILD_SHARED=0"]
+
+[cmake.dynamic]
+args = ["ENTITYX_BUILD_SHARED=1"]
+

package

+

name

+

Required

+

type

+

Required. Should be one of: "executable", "static", "dynamic", "library"

+

version

+

Required. Version string has to be in the format of "xxx.yyy.zzz" where xxx is major, yyy is minor and zzz is patch. Version format is based on semver 2.0.0

+

platforms

+

Required. A list of platforms the package supports. Run sibs platforms to view a list of supported platforms. +If platforms contains "any" then there is no need to specify other platforms

+

authors

+

Optional. A list of authors

+

dependencies

+

Optional. A list of dependencies which are specified in name-value pairs where the name is the name of the dependency, which should match the dependency name under the packages name specified in its project.conf file. +The value should be a version string, which specified the range of versions that you want to accept as a dependency to only allow dependency version that has the features you need and the version which hasn't changed its interface. +These are examples of the version string format:

+
# Version 1.0.0 or above and less than 2.0.0, same as >=1.0.0 and <2.0.0
+1.0.0
+# Version 1.0.0 or above
+>=1.0.0
+# Version above 1.0.0
+>1.0.0
+# Version exactly 1.0.0
+=1.0.0
+# Version less than 1.0.0
+<1.0.0
+# Version 1.0 or above but less than 2.0
+1.0 and <2.0
+# Version above 1.0 but less or equal to 1.3.2
+>1 and <=1.3.2
+
+

Dependencies are automatically choosen from system (linux, mac) or if no package manager exists, then it's download from an url (see https://gitlab.com/DEC05EBA/sibs_packages). +The dependency can also be a git project, in which case it will have the fields 'git' and optionally 'branch' and 'revision'. +'git' specifies the url to the git repository, 'branch' is the git branch that should be used - defaults to 'master'. +'revision' is the git revision to checkout, defaults to 'HEAD' (latest commit).

+

Dependencies can also be added to a project but adding sub directories with project.conf file. +The best way to do this is to create another git project for the dependency and then adding that git project as a git submodule. +Using sub projects allows you to modify dependency and propagate changes to dependant project without pushing changes to remote git repository (faster development).

+

lang.*

+

Optional. Allows you to change language specific configuration. [lang.c] is for C and [lang.cpp] is for C++. +Version specifies the language version, for [lang.c] the version can be ansi, c89, c99 or c11 - if not set, c11 will be used. +For [lang.cpp] the version can be c++11, c++14 or c++17 - if not set, c++14 will be used

+

define

+

Optional. A list of definitions which are specified in name-value pairs where the name is the preprocessor to define (in c: #define name value)

+

define.static

+

Works like [define], but these definitions are only used when building static project. If a definition with the same exists in [define], then it's overwritten

+

define.dynamic

+

Works like [define], but these definitions are only used when building dynamic project. If a definition with the same exists in [define], then it's overwritten

+

config

+

include_dirs

+

Optional. A list of directories which should be specified as global include directories when compiling. This means that instead of using relative paths to header files, you can include the directory with headers and then you only have to specify the header name when using #include

+

ignore_dirs

+

Optional. A list of directories to ignore. This means that if the ignored directory contains source files, then they wont be included in the build

+

expose_include_dirs

+

Optional. A list of directories which contains (header) files which should be exposed to dependencies as directories to include globally. This means that dependencies can include (header) files from the dependency without specifying path to the dependency

+

error_on_warning

+

Optional. This option should be either "true" or "false" and specifies if compiler warnings for the project (and not its dependencies) should work warnings as errors. +Default value is "false".

+

config.*

+

Optional. The name is structured in the following way: config.platform.libraryType.optimizationLevel +where platform is any of the platforms specified under [package] (or if package contains "any", then it can be any other platform). LibraryType is either "static" or "dynamic" - different configurations depending on if the package is included as a static or dynamic library by a dependant package. OptimizationLevel is either "debug" or "release", depending on which optimization level the "root" package was built with ("root" package is usually the project which is an executable)

+

lib

+

Optional. A directory which contains .lib or .dll files which should be included in dependant projects that uses this project

+

cmake

+

Optional. Using this allows you to build cmake projects. If a project contains cmake in the project.conf file, then sibs wont build the project itself +and will use cmake instead. Sibs will put the built executable and library files into the same location they would be if sibs build them, +meaning you can have dependency to a cmake project from a sibs project and it will automatically use the dependency library files

+

dir

+

Optional. Directory that contains CMakeLists.txt. If this is not specified, the project root will be used (same location where project.conf is located)

+

args

+

Optional. List of arguments to cmake. The arguments should be in the same format as "-D" arguments (options) in cmake, except they should exclude "-D". +Do not use CMAKE_BUILD_TYPE as sibs will automatically use it depending on the optimization level the user specifies when building project.

+

cmake.*

+

Optional. The name is structured in the following way: config.libraryType +where libraryType is either "static" or "dynamic" - different configurations depending on if the package is included as a static or dynamic library by a dependant package. +Args specified under [cmake.static] or [cmake.dynamic] are appended to the args specified under [cmake]

+

TODO

+

Make shell scripts portable. Currently they only work with bash... Use shellcheck to find the issues.

+
+
+
+ + + +
+
+ +
+
+ + +
+ + + + + + +
+ + + You can’t perform that action at this time. +
+ + + + + + + + + + + + + + +
+ + + + diff --git a/tests/main.c b/tests/main.c index ff1570b..6d84cfa 100644 --- a/tests/main.c +++ b/tests/main.c @@ -1,6 +1,48 @@ +#include +#include #include +#include -int main(int argc, char **argv) { - printf("hello, world!\n"); +char* file_get_content(const char *path, long *filesize) { + FILE *file = fopen(path, "rb"); + if(!file) { + perror(path); + return NULL; + } + + fseek(file, 0, SEEK_END); + *filesize = ftell(file); + fseek(file, 0, SEEK_SET); + + char *data = malloc(*filesize); + fread(data, 1, *filesize, file); + fclose(file); + return data; +} + +static void html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata_any) { + switch(parse_type) { + case HTML_PARSE_TAG_START: + printf("tag start: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data); + break; + case HTML_PARSE_TAG_END: + printf("tag end: %.*s\n", html_parser->tag_name.size, html_parser->tag_name.data); + break; + } +} + +int main() { + long filesize; + char *file_data = file_get_content("tests/github.html", &filesize); + if(!file_data) { + fprintf(stderr, "Failed to read from file: tests/github.html\n"); + return 1; + } + + HtmlParser html_parser; + html_parser_init(&html_parser, file_data, filesize, html_parse_callback, NULL); + html_parser_parse(&html_parser); + html_parser_deinit(&html_parser); + free(file_data); return 0; } -- cgit v1.2.3