aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-08-16 13:41:20 +0200
committerdec05eba <dec05eba@protonmail.com>2021-08-16 13:41:20 +0200
commit95c189f7445e6deca85130b7b8fa25dc76fabe12 (patch)
tree7d56d918402c4de74bd8983ff1c81888c3a6b371 /src
parent4a0955b851d087d9e97634ae2b7d2f76b0fe0503 (diff)
Add indexing and example
Diffstat (limited to 'src')
-rw-r--r--src/HtmlSearch.c17
-rw-r--r--src/NodeSearch.c1
-rw-r--r--src/XpathParser.c51
-rw-r--r--src/XpathTokenizer.c13
4 files changed, 60 insertions, 22 deletions
diff --git a/src/HtmlSearch.c b/src/HtmlSearch.c
index edb2a1c..45d8aa4 100644
--- a/src/HtmlSearch.c
+++ b/src/HtmlSearch.c
@@ -168,6 +168,8 @@ static int find_child_nodes(QuickMediaHtmlChildNode *node, const QuickMediaNodeS
if(!node)
return 0;
+ int match_index = 0;
+
/* We use two loops because we want to find children before grandchildren */
for(QuickMediaHtmlChildNode *child = node; child; child = child->next) {
/* A text node doesn't have a name */
@@ -194,7 +196,9 @@ static int find_child_nodes(QuickMediaHtmlChildNode *node, const QuickMediaNodeS
/* If we search without param, then it's a match */
if(!search_data->param.defined) {
- on_match();
+ if(search_data->param.index == -1 || search_data->param.index == match_index)
+ on_match();
+ ++match_index;
continue;
}
@@ -206,7 +210,9 @@ static int find_child_nodes(QuickMediaHtmlChildNode *node, const QuickMediaNodeS
assert(search_data->param.value.size > 0);
/* If the param value matches what we want to search for */
if(str_glob_match(child_attr->value, search_data->param.value, search_data->param.value_is_glob) == 0) {
- on_match();
+ if(search_data->param.index == -1 || search_data->param.index == match_index)
+ on_match();
+ ++match_index;
continue;
}
}
@@ -508,19 +514,12 @@ QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self) {
}
int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size) {
- /* Utf8 BOM */
- if(size >= 3 && memcmp(html_source, "\xef\xbb\xbf", 3) == 0) {
- html_source += 3;
- size -= 3;
- }
-
QuickMediaHtmlNode *html_node = &self->root_node;
html_node_init(html_node);
if(html_parser_parse(html_source, size, html_parse_callback, &html_node) != 0) {
quickmedia_html_search_deinit(self);
return 1;
}
-
return 0;
}
diff --git a/src/NodeSearch.c b/src/NodeSearch.c
index 376c801..633cf65 100644
--- a/src/NodeSearch.c
+++ b/src/NodeSearch.c
@@ -8,6 +8,7 @@ void quickmedia_node_search_param_init(QuickMediaNodeSearchParam *self) {
self->value.size = 0;
self->defined = 0;
self->value_is_glob = 0;
+ self->index = -1;
}
static void quickmedia_node_search_param_deinit(QuickMediaNodeSearchParam *self) {
diff --git a/src/XpathParser.c b/src/XpathParser.c
index f3248eb..b79fe11 100644
--- a/src/XpathParser.c
+++ b/src/XpathParser.c
@@ -18,26 +18,29 @@ static void quickmedia_xpath_parser_init(QuickMediaXpathParser *self, const char
quickmedia_xpath_tokenizer_init(&self->tokenizer, xpath);
}
-/* ('[' IDENTIFIER '=' '"' STRING '"' ']')? */
+/* (('[' IDENTIFIER '=' '"' STRING '"' ']') | ('[' NUMBER ']'))? */
static int xpath_parse_param(QuickMediaXpathParser *self, QuickMediaNodeSearchParam *result) {
if(quickmedia_xpath_tokenizer_next_if(&self->tokenizer, QUICKMEDIA_XPATH_TOKEN_OPEN_BRACKET) != 0)
return 1;
QuickMediaXpathToken token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
- if(token != QUICKMEDIA_XPATH_TOKEN_IDENTIFIER)
- return -1;
-
- result->name = self->tokenizer.identifier;
+ if(token == QUICKMEDIA_XPATH_TOKEN_IDENTIFIER) {
+ result->name = self->tokenizer.identifier;
- token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
- if(token != QUICKMEDIA_XPATH_TOKEN_EQUAL)
- return -2;
+ token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
+ if(token != QUICKMEDIA_XPATH_TOKEN_EQUAL)
+ return -2;
- token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
- if(token != QUICKMEDIA_XPATH_TOKEN_STRING)
- return -3;
-
- result->value = self->tokenizer.string;
+ token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
+ if(token != QUICKMEDIA_XPATH_TOKEN_STRING)
+ return -3;
+
+ result->value = self->tokenizer.string;
+ } else if(token == QUICKMEDIA_XPATH_TOKEN_NUMBER) {
+ result->index = self->tokenizer.number;
+ } else {
+ return -1;
+ }
token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
if(token != QUICKMEDIA_XPATH_TOKEN_CLOSING_BRACKET)
@@ -48,6 +51,24 @@ static int xpath_parse_param(QuickMediaXpathParser *self, QuickMediaNodeSearchPa
return 0;
}
+/* ('[' NUMBER ']'))? */
+static int xpath_parse_index(QuickMediaXpathParser *self, QuickMediaNodeSearchParam *result) {
+ if(quickmedia_xpath_tokenizer_next_if(&self->tokenizer, QUICKMEDIA_XPATH_TOKEN_OPEN_BRACKET) != 0)
+ return 1;
+
+ QuickMediaXpathToken token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
+ if(token != QUICKMEDIA_XPATH_TOKEN_NUMBER)
+ return -1;
+
+ result->index = self->tokenizer.number;
+
+ token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
+ if(token != QUICKMEDIA_XPATH_TOKEN_CLOSING_BRACKET)
+ return -4;
+
+ return 0;
+}
+
static int xpath_parse_node(QuickMediaXpathParser *self, QuickMediaNodeSearch *result) {
quickmedia_node_search_init(result);
QuickMediaXpathToken token = quickmedia_xpath_tokenizer_next(&self->tokenizer);
@@ -65,6 +86,10 @@ static int xpath_parse_node(QuickMediaXpathParser *self, QuickMediaNodeSearch *r
if(param_result < 0)
return param_result;
+ int index_result = xpath_parse_index(self, &result->param);
+ if(index_result < 0)
+ return index_result;
+
result->child = malloc(sizeof(QuickMediaNodeSearch));
if(!result->child)
return -1;
diff --git a/src/XpathTokenizer.c b/src/XpathTokenizer.c
index ae17939..f81f2d7 100644
--- a/src/XpathTokenizer.c
+++ b/src/XpathTokenizer.c
@@ -16,6 +16,10 @@ static int is_num(char c) {
return c >= '0' && c <= '9';
}
+static int c_to_num(char c) {
+ return c - '0';
+}
+
static int is_alphanum(char c) {
return is_alpha(c) || is_num(c);
}
@@ -55,6 +59,15 @@ QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *s
}
self->identifier.size = self->code - self->identifier.data;
return QUICKMEDIA_XPATH_TOKEN_IDENTIFIER;
+ } else if(is_num(c)) {
+ int number = c_to_num(c);
+ ++self->code;
+ while(is_num(*self->code)) {
+ number = number * 10 + c_to_num(*self->code);
+ ++self->code;
+ }
+ self->number = number;
+ return QUICKMEDIA_XPATH_TOKEN_NUMBER;
} else if(c == '[') {
++self->code;
return QUICKMEDIA_XPATH_TOKEN_OPEN_BRACKET;