aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-07-03 16:57:48 +0200
committerdec05eba <dec05eba@protonmail.com>2021-07-03 16:57:48 +0200
commitbbc88f60df2f22eaa4678f6d7f581229f7905679 (patch)
tree2aefc8c058a06aff8d40c1b61eb8b6b204a54b41
parent9f81d962e9683d946d7ee704d9cf418fc96d55b1 (diff)
Strip attribute key and value, memcmp if attribute value search doesn't contain glob
-rw-r--r--include/quickmedia/NodeSearch.h1
-rw-r--r--src/HtmlSearch.c95
-rw-r--r--src/NodeSearch.c1
-rw-r--r--src/XpathParser.c9
4 files changed, 64 insertions, 42 deletions
diff --git a/include/quickmedia/NodeSearch.h b/include/quickmedia/NodeSearch.h
index 9e7fd0c..0e3b3f1 100644
--- a/include/quickmedia/NodeSearch.h
+++ b/include/quickmedia/NodeSearch.h
@@ -16,6 +16,7 @@ typedef struct {
QuickMediaStringView name;
QuickMediaStringView value;
int defined;
+ int value_is_glob;
} QuickMediaNodeSearchParam;
typedef struct QuickMediaNodeSearch QuickMediaNodeSearch;
diff --git a/src/HtmlSearch.c b/src/HtmlSearch.c
index 03d7e3a..267ec6a 100644
--- a/src/HtmlSearch.c
+++ b/src/HtmlSearch.c
@@ -68,10 +68,17 @@ static char string_view_char_or(const QuickMediaStringView *str, size_t index, c
}
/* Returns 0 on match */
-static int str_glob_match(const QuickMediaStringView str, const QuickMediaStringView glob) {
+static int str_glob_match(const QuickMediaStringView str, const QuickMediaStringView glob, int is_glob) {
size_t str_index = 0;
size_t glob_index = 0;
+ if(!is_glob) {
+ if(glob.size == str.size && memcmp(str.data, glob.data, str.size) == 0)
+ return 0;
+ else
+ return 1;
+ }
+
if(str.size == 0) {
/* TODO: What about glob = **** (more than one asterix) */
if(glob.size == 0 || (glob.size == 1 && glob.data[0] == '*'))
@@ -167,7 +174,7 @@ static int find_child_nodes(QuickMediaHtmlChildNode *node, const QuickMediaNodeS
assert(search_data->param.value.size > 0);
/* If the param value matches what we want to search for */
- if(str_glob_match(child_attr->value, search_data->param.value) == 0) {
+ if(str_glob_match(child_attr->value, search_data->param.value, search_data->param.value_is_glob) == 0) {
on_match();
continue;
}
@@ -287,6 +294,45 @@ void html_node_child_deinit(QuickMediaHtmlChildNode *self) {
html_node_deinit(&self->node);
}
+static int is_whitespace(int c) {
+ switch(c) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\v':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int is_newline(int c) {
+ return c == '\n' || c == '\r';
+}
+
+static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
+ size_t i = 0;
+ while(i < size && strip_filter_func(str[i])) {
+ ++i;
+ }
+ *output_str = str + i;
+ *output_size = size - i;
+}
+
+static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) {
+ ssize_t i = size - 1;
+ while(i >= 0 && strip_filter_func(str[i])) {
+ --i;
+ }
+ *output_size = i + 1;
+}
+
+static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
+ lstrip(str, size, output_str, output_size, strip_filter_func);
+ rstrip(*output_str, *output_size, output_size, strip_filter_func);
+}
+
static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) {
QuickMediaHtmlNode **html_node_p = userdata;
QuickMediaHtmlNode *html_node = *html_node_p;
@@ -308,7 +354,11 @@ static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type
break;
}
case HTML_PARSE_ATTRIBUTE: {
- if(html_node_add_attribute(html_node, html_parser->attribute_key, html_parser->attribute_value) != 0)
+ HtmlStringView attr_key = html_parser->attribute_key;
+ HtmlStringView attr_value = html_parser->attribute_value;
+ strip(attr_key.data, attr_key.size, &attr_key.data, &attr_key.size, is_whitespace);
+ strip(attr_value.data, attr_value.size, &attr_value.data, &attr_value.size, is_whitespace);
+ if(html_node_add_attribute(html_node, attr_key, attr_value) != 0)
return 1;
break;
}
@@ -329,45 +379,6 @@ static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type
return 0;
}
-static int is_whitespace(int c) {
- switch(c) {
- case ' ':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- return 1;
- default:
- return 0;
- }
-}
-
-static int is_newline(int c) {
- return c == '\n' || c == '\r';
-}
-
-static void lstrip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
- size_t i = 0;
- while(i < size && strip_filter_func(str[i])) {
- ++i;
- }
- *output_str = str + i;
- *output_size = size - i;
-}
-
-static void rstrip(const char *str, size_t size, size_t *output_size, int(*strip_filter_func)(int)) {
- ssize_t i = size - 1;
- while(i >= 0 && strip_filter_func(str[i])) {
- --i;
- }
- *output_size = i + 1;
-}
-
-static void strip(const char *str, size_t size, const char **output_str, size_t *output_size, int(*strip_filter_func)(int)) {
- lstrip(str, size, output_str, output_size, strip_filter_func);
- rstrip(*output_str, *output_size, output_size, strip_filter_func);
-}
-
QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name) {
QuickMediaStringView attr_name;
attr_name.data = attribute_name;
diff --git a/src/NodeSearch.c b/src/NodeSearch.c
index bddb26c..376c801 100644
--- a/src/NodeSearch.c
+++ b/src/NodeSearch.c
@@ -7,6 +7,7 @@ void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self) {
self->value.data = NULL;
self->value.size = 0;
self->defined = 0;
+ self->value_is_glob = 0;
}
static void quickmedia_node_search_param_deinit(QuickMediaNodeSearchParam *self) {
diff --git a/src/XpathParser.c b/src/XpathParser.c
index 0dbe270..f3248eb 100644
--- a/src/XpathParser.c
+++ b/src/XpathParser.c
@@ -6,6 +6,14 @@ typedef struct {
QuickMediaXpathTokenizer tokenizer;
} QuickMediaXpathParser;
+static int contains_glob_char(QuickMediaStringView str) {
+ for(size_t i = 0; i < str.size; ++i) {
+ if(str.data[i] == '*')
+ return 1;
+ }
+ return 0;
+}
+
static void quickmedia_xpath_parser_init(QuickMediaXpathParser *self, const char *xpath) {
quickmedia_xpath_tokenizer_init(&self->tokenizer, xpath);
}
@@ -36,6 +44,7 @@ static int xpath_parse_param(QuickMediaXpathParser *self, QuickMediaNodeSearchPa
return -4;
result->defined = 1;
+ result->value_is_glob = contains_glob_char(result->value);
return 0;
}