aboutsummaryrefslogtreecommitdiff
path: root/src/XpathTokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/XpathTokenizer.c')
-rw-r--r--src/XpathTokenizer.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/src/XpathTokenizer.c b/src/XpathTokenizer.c
new file mode 100644
index 0000000..32bede9
--- /dev/null
+++ b/src/XpathTokenizer.c
@@ -0,0 +1,104 @@
+#include "../include/quickmedia/XpathTokenizer.h"
+#include <stdlib.h>
+#include <string.h>
+
+void quickmedia_xpath_tokenizer_init(QuickMediaXpathTokenizer *self, const char *xpath) {
+ self->code = xpath;
+ self->identifier.data = NULL;
+ self->identifier.size = 0;
+}
+
+static int is_alpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int is_num(char c) {
+ return c >= '0' && c <= '9';
+}
+
+static int is_alphanum(char c) {
+ return is_alpha(c) || is_num(c);
+}
+
+static const char* find_end_of_string(const char *str, char escape_symbol) {
+ int escape = 0;
+ while(*str != '\0') {
+ char c = *str;
+ if(c == '\\') {
+ escape = !escape;
+ } else if(c == escape_symbol) {
+ if(!escape)
+ return str;
+ } else {
+ escape = 0;
+ }
+ ++str;
+ }
+ return str;
+}
+
+QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *self) {
+ char c = *self->code;
+ if(c == '/') {
+ ++self->code;
+ c = *self->code;
+ if(c == '/') {
+ ++self->code;
+ return QUICKMEDIA_XPATH_TOKEN_CHILD_RECURSIVE;
+ }
+ return QUICKMEDIA_XPATH_TOKEN_CHILD;
+ } else if(is_alpha(c)) {
+ self->identifier.data = self->code;
+ ++self->code;
+ while(is_alphanum(*self->code) || *self->code == '_' || *self->code == '-') {
+ ++self->code;
+ }
+ self->identifier.size = self->code - self->identifier.data;
+ return QUICKMEDIA_XPATH_TOKEN_IDENTIFIER;
+ } else if(c == '[') {
+ ++self->code;
+ return QUICKMEDIA_XPATH_TOKEN_OPEN_BRACKET;
+ } else if(c == ']') {
+ ++self->code;
+ return QUICKMEDIA_XPATH_TOKEN_CLOSING_BRACKET;
+ } else if(c == '=') {
+ ++self->code;
+ return QUICKMEDIA_XPATH_TOKEN_EQUAL;
+ } else if(c == '"' || c == '\'') {
+ char escape_symbol = c;
+ ++self->code;
+ self->string.data = self->code;
+ self->code = find_end_of_string(self->string.data, escape_symbol);
+ if(*self->code == '\0') {
+ /* Reached end of xpath before end of string */
+ return QUICKMEDIA_XPATH_TOKEN_INVALID;
+ }
+ self->string.size = self->code - self->string.data;
+ ++self->code;
+ return QUICKMEDIA_XPATH_TOKEN_STRING;
+ } else if(c == '\0') {
+ return QUICKMEDIA_XPATH_TOKEN_END_OF_FILE;
+ } else {
+ /* Invalid symbol @c */
+ return QUICKMEDIA_XPATH_TOKEN_INVALID;
+ }
+}
+
+int quickmedia_xpath_tokenizer_next_if(QuickMediaXpathTokenizer *self, QuickMediaXpathToken token) {
+ const char *restore_point = self->code;
+ if(quickmedia_xpath_tokenizer_next(self) == token)
+ return 0;
+ self->code = restore_point;
+ return -1;
+}
+
+char* quickmedia_xpath_tokenizer_copy_identifier(QuickMediaXpathTokenizer *self) {
+ char *result = malloc(self->identifier.size + 1);
+ result[self->identifier.size] = '\0';
+ memcpy(result, self->identifier.data, self->identifier.size);
+ return result;
+}
+
+char* quickmedia_xpath_tokenizer_copy_string(QuickMediaXpathTokenizer *self) {
+ return quickmedia_xpath_tokenizer_copy_identifier(self);
+}