From c17412cce925ce226d3835a2e59b4d9f31b5b3ed Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 25 May 2019 02:17:15 +0200 Subject: Initial commit --- src/XpathTokenizer.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/XpathTokenizer.c (limited to 'src/XpathTokenizer.c') diff --git a/src/XpathTokenizer.c b/src/XpathTokenizer.c new file mode 100644 index 0000000..32bede9 --- /dev/null +++ b/src/XpathTokenizer.c @@ -0,0 +1,104 @@ +#include "../include/quickmedia/XpathTokenizer.h" +#include +#include + +void quickmedia_xpath_tokenizer_init(QuickMediaXpathTokenizer *self, const char *xpath) { + self->code = xpath; + self->identifier.data = NULL; + self->identifier.size = 0; +} + +static int is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static int is_num(char c) { + return c >= '0' && c <= '9'; +} + +static int is_alphanum(char c) { + return is_alpha(c) || is_num(c); +} + +static const char* find_end_of_string(const char *str, char escape_symbol) { + int escape = 0; + while(*str != '\0') { + char c = *str; + if(c == '\\') { + escape = !escape; + } else if(c == escape_symbol) { + if(!escape) + return str; + } else { + escape = 0; + } + ++str; + } + return str; +} + +QuickMediaXpathToken quickmedia_xpath_tokenizer_next(QuickMediaXpathTokenizer *self) { + char c = *self->code; + if(c == '/') { + ++self->code; + c = *self->code; + if(c == '/') { + ++self->code; + return QUICKMEDIA_XPATH_TOKEN_CHILD_RECURSIVE; + } + return QUICKMEDIA_XPATH_TOKEN_CHILD; + } else if(is_alpha(c)) { + self->identifier.data = self->code; + ++self->code; + while(is_alphanum(*self->code) || *self->code == '_' || *self->code == '-') { + ++self->code; + } + self->identifier.size = self->code - self->identifier.data; + return QUICKMEDIA_XPATH_TOKEN_IDENTIFIER; + } else if(c == '[') { + ++self->code; + return QUICKMEDIA_XPATH_TOKEN_OPEN_BRACKET; + } else if(c == ']') { + ++self->code; + return QUICKMEDIA_XPATH_TOKEN_CLOSING_BRACKET; + } else if(c == '=') { + ++self->code; + return QUICKMEDIA_XPATH_TOKEN_EQUAL; + } else if(c == '"' || c == '\'') { + char escape_symbol = c; + ++self->code; + self->string.data = self->code; + self->code = find_end_of_string(self->string.data, escape_symbol); + if(*self->code == '\0') { + /* Reached end of xpath before end of string */ + return QUICKMEDIA_XPATH_TOKEN_INVALID; + } + self->string.size = self->code - self->string.data; + ++self->code; + return QUICKMEDIA_XPATH_TOKEN_STRING; + } else if(c == '\0') { + return QUICKMEDIA_XPATH_TOKEN_END_OF_FILE; + } else { + /* Invalid symbol @c */ + return QUICKMEDIA_XPATH_TOKEN_INVALID; + } +} + +int quickmedia_xpath_tokenizer_next_if(QuickMediaXpathTokenizer *self, QuickMediaXpathToken token) { + const char *restore_point = self->code; + if(quickmedia_xpath_tokenizer_next(self) == token) + return 0; + self->code = restore_point; + return -1; +} + +char* quickmedia_xpath_tokenizer_copy_identifier(QuickMediaXpathTokenizer *self) { + char *result = malloc(self->identifier.size + 1); + result[self->identifier.size] = '\0'; + memcpy(result, self->identifier.data, self->identifier.size); + return result; +} + +char* quickmedia_xpath_tokenizer_copy_string(QuickMediaXpathTokenizer *self) { + return quickmedia_xpath_tokenizer_copy_identifier(self); +} -- cgit v1.2.3