From 44b41bcab276f85c96bce88609865ffa4232f7e2 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 14 Jan 2020 08:40:26 +0100 Subject: add parsing of rhs and map --- include/parser.h | 1 + include/tokenizer.h | 6 ++++- src/parser.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- src/tokenizer.c | 49 +++++++++++++++++++++++++++++++----- 4 files changed, 120 insertions(+), 8 deletions(-) diff --git a/include/parser.h b/include/parser.h index 3523127..d130597 100644 --- a/include/parser.h +++ b/include/parser.h @@ -3,6 +3,7 @@ #include "tokenizer.h" +/* Returns 0 on success */ int tsl_parse(const char *code, size_t code_size); #endif /* TSL_PARSER_H */ diff --git a/include/tokenizer.h b/include/tokenizer.h index a1d0932..8b511ed 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -13,10 +13,13 @@ typedef enum { TSL_TOKEN_END_OF_FILE, TSL_TOKEN_UNEXPECTED_SYMBOL, TSL_TOKEN_IDENTIFIER, + TSL_TOKEN_STRING, TSL_TOKEN_NUM, TSL_TOKEN_BOOL, TSL_TOKEN_NULL, - TSL_TOKEN_EQUAL + TSL_TOKEN_EQUAL, + TSL_TOKEN_LBRACE, + TSL_TOKEN_RBRACE } TslToken; typedef struct { @@ -25,6 +28,7 @@ typedef struct { size_t code_index; TslStringView identifier; + TslStringView string; int bool_value; int64_t number_value; } TslTokenizer; diff --git a/src/parser.c b/src/parser.c index 35a4abb..90c5a24 100644 --- a/src/parser.c +++ b/src/parser.c @@ -9,7 +9,74 @@ static void tsl_parser_init(TslParser *self, const char *code, size_t code_size) tsl_tokenizer_init(&self->tokenizer, code, code_size); } +typedef enum { + MAP_TYPE_UNKNOWN, + MAP_TYPE_LIST, + MAP_TYPE_MAP +} MapType; + +static int tsl_parser_parse_map(TslParser *self) { + MapType map_type = MAP_TYPE_UNKNOWN; + for(;;) { + TslToken token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_NUM) { + printf("rhs num: %ld\n", self->tokenizer.number_value); + } else if(token == TSL_TOKEN_BOOL) { + printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); + } else if(token == TSL_TOKEN_NULL) { + printf("rhs null\n"); + } else if(token == TSL_TOKEN_STRING) { + printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data); + token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_COMMA) { + if(map_type == MAP_TYPE_MAP) { + /* TODO: Show which lines have mismatching element types */ + fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n"); + return -1; + } + map_type = MAP_TYPE_LIST; + } else if(token == TSL_TOKEN_COLON) { + if(map_type == MAP_TYPE_LIST) { + /* TODO: Show which lines have mismatching element types */ + fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n"); + return -1; + } + map_type = MAP_TYPE_LIST; + } else if(token == TSL_TOKEN_RBRACE) { + if(map_type == MAP_TYPE_MAP) { + /* TODO: Show why ':' is needed (because other elements in the map are map types) */ + fprintf(stderr, "Error: Expected ':', got '}'\n"); + return -1; + } + return 0; + } else { + /* TODO: Show only "expects '}' when not using key-value pair" */ + fprintf(stderr, "Error: Expected ',', ':' or '}', got TODO\n"); + return -1; + } + } else if(token == TSL_TOKEN_RBRACE) { + return 0; + } else { + fprintf(stderr, "Error: Expected '}', got TODO\n"); + return -1; + } + } +} + static int tsl_parser_parse_rhs(TslParser *self) { + TslToken token = tsl_tokenizer_next(&self->tokenizer); + if(token == TSL_TOKEN_NUM) { + printf("rhs num: %ld\n", self->tokenizer.number_value); + } else if(token == TSL_TOKEN_BOOL) { + printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false"); + } else if(token == TSL_TOKEN_NULL) { + printf("rhs null\n"); + } else if(token == TSL_TOKEN_STRING) { + printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data); + } else { + fprintf(stderr, "Error: Expected number, bool or null, got TODO\n"); + return -1; + } return 0; } @@ -18,10 +85,13 @@ static int tsl_parser_parse(TslParser *self) { TslToken token = tsl_tokenizer_next(&self->tokenizer); if(token == TSL_TOKEN_IDENTIFIER) { TslStringView identifier = self->tokenizer.identifier; + printf("identifier: %.*s\n", identifier.size, identifier.data); if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_EQUAL)) { return -1; } - tsl_parser_parse_rhs(self); + if(tsl_parser_parse_rhs(self) != 0) { + return -1; + } } else if(token == TSL_TOKEN_END_OF_FILE) { break; } else { diff --git a/src/tokenizer.c b/src/tokenizer.c index 97673f5..efc9eee 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -9,6 +9,8 @@ void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) self->identifier.data = NULL; self->identifier.size = 0; + self->string.data = NULL; + self->string.size = 0; self->bool_value = 0; self->number_value = 0; } @@ -84,6 +86,27 @@ static int64_t string_to_int(TslStringView *str) { return num; } +static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_start_symbol) { + int string_escape = 0; + for(;;) { + char c = tsl_tokenizer_get_char(self); + if(c == string_start_symbol) { + ++self->code_index; + if(!string_escape) + return 1; + string_escape = 0; + } else if(c == '\\') { + ++self->code_index; + string_escape = !string_escape; + } else if(c == '\0') { + return 0; + } else { + ++self->code_index; + string_escape = 0; + } + } +} + TslToken tsl_tokenizer_next(TslTokenizer *self) { char c; tsl_tokenizer_skip_whitespace(self); @@ -102,16 +125,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { self->identifier.size = self->code_index - identifier_start; switch(self->identifier.size) { - case 3: { - if(memcmp(self->identifier.data, "null", 3) == 0) { - return TSL_TOKEN_NULL; - } - break; - } case 4: { if(memcmp(self->identifier.data, "true", 4) == 0) { self->bool_value = 1; return TSL_TOKEN_BOOL; + } else if(memcmp(self->identifier.data, "null", 4) == 0) { + return TSL_TOKEN_NULL; } break; } @@ -137,9 +156,27 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) { self->identifier.size = self->code_index - num_start; self->number_value = string_to_int(&self->identifier); return TSL_TOKEN_NUM; + } else if(c == '"') { + char string_start_symbol = c; + size_t string_start; + ++self->code_index; + string_start = self->code_index; + if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) { + self->string.data = self->code + string_start; + self->string.size = self->code_index - 1 - string_start; + return TSL_TOKEN_STRING; + } else { + return TSL_TOKEN_END_OF_FILE; + } } else if(c == '=') { ++self->code_index; return TSL_TOKEN_EQUAL; + } else if(c == '{') { + ++self->code_index; + return TSL_TOKEN_LBRACE; + } else if(c == '}') { + ++self->code_index; + return TSL_TOKEN_RBRACE; } else if(c == '\0') { return TSL_TOKEN_END_OF_FILE; } else { -- cgit v1.2.3