From bbc88f60df2f22eaa4678f6d7f581229f7905679 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 3 Jul 2021 16:57:48 +0200 Subject: Strip attribute key and value, memcmp if attribute value search doesn't contain glob --- src/HtmlSearch.c | 95 +++++++++++++++++++++++++++++++------------------------ src/NodeSearch.c | 1 + src/XpathParser.c | 9 ++++++ 3 files changed, 63 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/HtmlSearch.c b/src/HtmlSearch.c index 03d7e3a..267ec6a 100644 --- a/src/HtmlSearch.c +++ b/src/HtmlSearch.c @@ -68,10 +68,17 @@ static char string_view_char_or(const QuickMediaStringView *str, size_t index, c } /* Returns 0 on match */ -static int str_glob_match(const QuickMediaStringView str, const QuickMediaStringView glob) { +static int str_glob_match(const QuickMediaStringView str, const QuickMediaStringView glob, int is_glob) { size_t str_index = 0; size_t glob_index = 0; + if(!is_glob) { + if(glob.size == str.size && memcmp(str.data, glob.data, str.size) == 0) + return 0; + else + return 1; + } + if(str.size == 0) { /* TODO: What about glob = **** (more than one asterix) */ if(glob.size == 0 || (glob.size == 1 && glob.data[0] == '*')) @@ -167,7 +174,7 @@ static int find_child_nodes(QuickMediaHtmlChildNode *node, const QuickMediaNodeS assert(search_data->param.value.size > 0); /* If the param value matches what we want to search for */ - if(str_glob_match(child_attr->value, search_data->param.value) == 0) { + if(str_glob_match(child_attr->value, search_data->param.value, search_data->param.value_is_glob) == 0) { on_match(); continue; } @@ -287,6 +294,45 @@ void html_node_child_deinit(QuickMediaHtmlChildNode *self) { html_node_deinit(&self->node); } +static int is_whitespace(int c) { + switch(c) { + case ' ': + case '\n': + case '\r': + case '\t': + case '\v': + return 1; + default: + return 0; + } +} + +static int is_newline(int c) { + return c == '\n' || c == '\r'; +} + +static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { + size_t i = 0; + while(i < size && strip_filter_func(str[i])) { + ++i; + } + *output_str = str + i; + *output_size = size - i; +} + +static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { + ssize_t i = size - 1; + while(i >= 0 && strip_filter_func(str[i])) { + --i; + } + *output_size = i + 1; +} + +static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { + lstrip(str, size, output_str, output_size, strip_filter_func); + rstrip(*output_str, *output_size, output_size, strip_filter_func); +} + static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { QuickMediaHtmlNode **html_node_p = userdata; QuickMediaHtmlNode *html_node = *html_node_p; @@ -308,7 +354,11 @@ static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type break; } case HTML_PARSE_ATTRIBUTE: { - if(html_node_add_attribute(html_node, html_parser->attribute_key, html_parser->attribute_value) != 0) + HtmlStringView attr_key = html_parser->attribute_key; + HtmlStringView attr_value = html_parser->attribute_value; + strip(attr_key.data, attr_key.size, &attr_key.data, &attr_key.size, is_whitespace); + strip(attr_value.data, attr_value.size, &attr_value.data, &attr_value.size, is_whitespace); + if(html_node_add_attribute(html_node, attr_key, attr_value) != 0) return 1; break; } @@ -329,45 +379,6 @@ static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type return 0; } -static int is_whitespace(int c) { - switch(c) { - case ' ': - case '\n': - case '\r': - case '\t': - case '\v': - return 1; - default: - return 0; - } -} - -static int is_newline(int c) { - return c == '\n' || c == '\r'; -} - -static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { - size_t i = 0; - while(i < size && strip_filter_func(str[i])) { - ++i; - } - *output_str = str + i; - *output_size = size - i; -} - -static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) { - ssize_t i = size - 1; - while(i >= 0 && strip_filter_func(str[i])) { - --i; - } - *output_size = i + 1; -} - -static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) { - lstrip(str, size, output_str, output_size, strip_filter_func); - rstrip(*output_str, *output_size, output_size, strip_filter_func); -} - QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name) { QuickMediaStringView attr_name; attr_name.data = attribute_name; diff --git a/src/NodeSearch.c b/src/NodeSearch.c index bddb26c..376c801 100644 --- a/src/NodeSearch.c +++ b/src/NodeSearch.c @@ -7,6 +7,7 @@ void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self) { self->value.data = NULL; self->value.size = 0; self->defined = 0; + self->value_is_glob = 0; } static void quickmedia_node_search_param_deinit(QuickMediaNodeSearchParam *self) { diff --git a/src/XpathParser.c b/src/XpathParser.c index 0dbe270..f3248eb 100644 --- a/src/XpathParser.c +++ b/src/XpathParser.c @@ -6,6 +6,14 @@ typedef struct { QuickMediaXpathTokenizer tokenizer; } QuickMediaXpathParser; +static int contains_glob_char(QuickMediaStringView str) { + for(size_t i = 0; i < str.size; ++i) { + if(str.data[i] == '*') + return 1; + } + return 0; +} + static void quickmedia_xpath_parser_init(QuickMediaXpathParser *self, const char *xpath) { quickmedia_xpath_tokenizer_init(&self->tokenizer, xpath); } @@ -36,6 +44,7 @@ static int xpath_parse_param(QuickMediaXpathParser *self, QuickMediaNodeSearchPa return -4; result->defined = 1; + result->value_is_glob = contains_glob_char(result->value); return 0; } -- cgit v1.2.3