aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-01-14 08:40:26 +0100
committerdec05eba <dec05eba@protonmail.com>2020-01-14 08:40:26 +0100
commit44b41bcab276f85c96bce88609865ffa4232f7e2 (patch)
tree224a20e53b65c2226c54d1b4ed2ee894c2bc9b63
parentf5fc07765303055922e1dfcc35f2f2f620c1e66b (diff)
add parsing of rhs and map
-rw-r--r--include/parser.h1
-rw-r--r--include/tokenizer.h6
-rw-r--r--src/parser.c72
-rw-r--r--src/tokenizer.c49
4 files changed, 120 insertions, 8 deletions
diff --git a/include/parser.h b/include/parser.h
index 3523127..d130597 100644
--- a/include/parser.h
+++ b/include/parser.h
@@ -3,6 +3,7 @@
#include "tokenizer.h"
+/* Returns 0 on success */
int tsl_parse(const char *code, size_t code_size);
#endif /* TSL_PARSER_H */
diff --git a/include/tokenizer.h b/include/tokenizer.h
index a1d0932..8b511ed 100644
--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@@ -13,10 +13,13 @@ typedef enum {
TSL_TOKEN_END_OF_FILE,
TSL_TOKEN_UNEXPECTED_SYMBOL,
TSL_TOKEN_IDENTIFIER,
+ TSL_TOKEN_STRING,
TSL_TOKEN_NUM,
TSL_TOKEN_BOOL,
TSL_TOKEN_NULL,
- TSL_TOKEN_EQUAL
+ TSL_TOKEN_EQUAL,
+ TSL_TOKEN_LBRACE,
+ TSL_TOKEN_RBRACE
} TslToken;
typedef struct {
@@ -25,6 +28,7 @@ typedef struct {
size_t code_index;
TslStringView identifier;
+ TslStringView string;
int bool_value;
int64_t number_value;
} TslTokenizer;
diff --git a/src/parser.c b/src/parser.c
index 35a4abb..90c5a24 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -9,7 +9,74 @@ static void tsl_parser_init(TslParser *self, const char *code, size_t code_size)
tsl_tokenizer_init(&self->tokenizer, code, code_size);
}
+typedef enum {
+ MAP_TYPE_UNKNOWN,
+ MAP_TYPE_LIST,
+ MAP_TYPE_MAP
+} MapType;
+
+static int tsl_parser_parse_map(TslParser *self) {
+ MapType map_type = MAP_TYPE_UNKNOWN;
+ for(;;) {
+ TslToken token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_NUM) {
+ printf("rhs num: %ld\n", self->tokenizer.number_value);
+ } else if(token == TSL_TOKEN_BOOL) {
+ printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false");
+ } else if(token == TSL_TOKEN_NULL) {
+ printf("rhs null\n");
+ } else if(token == TSL_TOKEN_STRING) {
+ printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data);
+ token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_COMMA) {
+ if(map_type == MAP_TYPE_MAP) {
+ /* TODO: Show which lines have mismatching element types */
+ fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n");
+ return -1;
+ }
+ map_type = MAP_TYPE_LIST;
+ } else if(token == TSL_TOKEN_COLON) {
+ if(map_type == MAP_TYPE_LIST) {
+ /* TODO: Show which lines have mismatching element types */
+ fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n");
+ return -1;
+ }
+ map_type = MAP_TYPE_LIST;
+ } else if(token == TSL_TOKEN_RBRACE) {
+ if(map_type == MAP_TYPE_MAP) {
+ /* TODO: Show why ':' is needed (because other elements in the map are map types) */
+ fprintf(stderr, "Error: Expected ':', got '}'\n");
+ return -1;
+ }
+ return 0;
+ } else {
+ /* TODO: Show only "expects '}' when not using key-value pair" */
+ fprintf(stderr, "Error: Expected ',', ':' or '}', got TODO\n");
+ return -1;
+ }
+ } else if(token == TSL_TOKEN_RBRACE) {
+ return 0;
+ } else {
+ fprintf(stderr, "Error: Expected '}', got TODO\n");
+ return -1;
+ }
+ }
+}
+
static int tsl_parser_parse_rhs(TslParser *self) {
+ TslToken token = tsl_tokenizer_next(&self->tokenizer);
+ if(token == TSL_TOKEN_NUM) {
+ printf("rhs num: %ld\n", self->tokenizer.number_value);
+ } else if(token == TSL_TOKEN_BOOL) {
+ printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false");
+ } else if(token == TSL_TOKEN_NULL) {
+ printf("rhs null\n");
+ } else if(token == TSL_TOKEN_STRING) {
+ printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data);
+ } else {
+ fprintf(stderr, "Error: Expected number, bool or null, got TODO\n");
+ return -1;
+ }
return 0;
}
@@ -18,10 +85,13 @@ static int tsl_parser_parse(TslParser *self) {
TslToken token = tsl_tokenizer_next(&self->tokenizer);
if(token == TSL_TOKEN_IDENTIFIER) {
TslStringView identifier = self->tokenizer.identifier;
+ printf("identifier: %.*s\n", identifier.size, identifier.data);
if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_EQUAL)) {
return -1;
}
- tsl_parser_parse_rhs(self);
+ if(tsl_parser_parse_rhs(self) != 0) {
+ return -1;
+ }
} else if(token == TSL_TOKEN_END_OF_FILE) {
break;
} else {
diff --git a/src/tokenizer.c b/src/tokenizer.c
index 97673f5..efc9eee 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -9,6 +9,8 @@ void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size)
self->identifier.data = NULL;
self->identifier.size = 0;
+ self->string.data = NULL;
+ self->string.size = 0;
self->bool_value = 0;
self->number_value = 0;
}
@@ -84,6 +86,27 @@ static int64_t string_to_int(TslStringView *str) {
return num;
}
+static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_start_symbol) {
+ int string_escape = 0;
+ for(;;) {
+ char c = tsl_tokenizer_get_char(self);
+ if(c == string_start_symbol) {
+ ++self->code_index;
+ if(!string_escape)
+ return 1;
+ string_escape = 0;
+ } else if(c == '\\') {
+ ++self->code_index;
+ string_escape = !string_escape;
+ } else if(c == '\0') {
+ return 0;
+ } else {
+ ++self->code_index;
+ string_escape = 0;
+ }
+ }
+}
+
TslToken tsl_tokenizer_next(TslTokenizer *self) {
char c;
tsl_tokenizer_skip_whitespace(self);
@@ -102,16 +125,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
self->identifier.size = self->code_index - identifier_start;
switch(self->identifier.size) {
- case 3: {
- if(memcmp(self->identifier.data, "null", 3) == 0) {
- return TSL_TOKEN_NULL;
- }
- break;
- }
case 4: {
if(memcmp(self->identifier.data, "true", 4) == 0) {
self->bool_value = 1;
return TSL_TOKEN_BOOL;
+ } else if(memcmp(self->identifier.data, "null", 4) == 0) {
+ return TSL_TOKEN_NULL;
}
break;
}
@@ -137,9 +156,27 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
self->identifier.size = self->code_index - num_start;
self->number_value = string_to_int(&self->identifier);
return TSL_TOKEN_NUM;
+ } else if(c == '"') {
+ char string_start_symbol = c;
+ size_t string_start;
+ ++self->code_index;
+ string_start = self->code_index;
+ if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) {
+ self->string.data = self->code + string_start;
+ self->string.size = self->code_index - 1 - string_start;
+ return TSL_TOKEN_STRING;
+ } else {
+ return TSL_TOKEN_END_OF_FILE;
+ }
} else if(c == '=') {
++self->code_index;
return TSL_TOKEN_EQUAL;
+ } else if(c == '{') {
+ ++self->code_index;
+ return TSL_TOKEN_LBRACE;
+ } else if(c == '}') {
+ ++self->code_index;
+ return TSL_TOKEN_RBRACE;
} else if(c == '\0') {
return TSL_TOKEN_END_OF_FILE;
} else {