add parsing of rhs and map

author: dec05eba <dec05eba@protonmail.com> 2020-01-14 08:40:26 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-01-14 08:40:26 +0100
commit: 44b41bcab276f85c96bce88609865ffa4232f7e2 (patch)
tree: 224a20e53b65c2226c54d1b4ed2ee894c2bc9b63
parent: f5fc07765303055922e1dfcc35f2f2f620c1e66b (diff)
4 files changed, 120 insertions, 8 deletions
diff --git a/include/parser.h b/include/parser.h
index 3523127..d130597 100644
--- a/include/parser.h
+++ b/include/parser.h
@@ -3,6 +3,7 @@
 
 #include "tokenizer.h"
 
+/* Returns 0 on success */
 int tsl_parse(const char *code, size_t code_size);
 
 #endif /* TSL_PARSER_H */
diff --git a/include/tokenizer.h b/include/tokenizer.h
index a1d0932..8b511ed 100644
--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@@ -13,10 +13,13 @@ typedef enum {
     TSL_TOKEN_END_OF_FILE,
     TSL_TOKEN_UNEXPECTED_SYMBOL,
     TSL_TOKEN_IDENTIFIER,
+    TSL_TOKEN_STRING,
     TSL_TOKEN_NUM,
     TSL_TOKEN_BOOL,
     TSL_TOKEN_NULL,
-    TSL_TOKEN_EQUAL
+    TSL_TOKEN_EQUAL,
+    TSL_TOKEN_LBRACE,
+    TSL_TOKEN_RBRACE
 } TslToken;
 
 typedef struct {
@@ -25,6 +28,7 @@ typedef struct {
     size_t code_index;
 
     TslStringView identifier;
+    TslStringView string;
     int bool_value;
     int64_t number_value;
 } TslTokenizer;
diff --git a/src/parser.c b/src/parser.c
index 35a4abb..90c5a24 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -9,7 +9,74 @@ static void tsl_parser_init(TslParser *self, const char *code, size_t code_size)
     tsl_tokenizer_init(&self->tokenizer, code, code_size);
 }
 
+typedef enum {
+    MAP_TYPE_UNKNOWN,
+    MAP_TYPE_LIST,
+    MAP_TYPE_MAP
+} MapType;
+
+static int tsl_parser_parse_map(TslParser *self) {
+    MapType map_type = MAP_TYPE_UNKNOWN;
+    for(;;) {
+        TslToken token = tsl_tokenizer_next(&self->tokenizer);
+        if(token == TSL_TOKEN_NUM) {
+            printf("rhs num: %ld\n", self->tokenizer.number_value);
+        } else if(token == TSL_TOKEN_BOOL) {
+            printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false");
+        } else if(token == TSL_TOKEN_NULL) {
+            printf("rhs null\n");
+        } else if(token == TSL_TOKEN_STRING) {
+            printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data);
+            token = tsl_tokenizer_next(&self->tokenizer);
+            if(token == TSL_TOKEN_COMMA) {
+                if(map_type == MAP_TYPE_MAP) {
+                    /* TODO: Show which lines have mismatching element types */
+                    fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n");
+                    return -1;
+                }
+                map_type = MAP_TYPE_LIST;
+            } else if(token == TSL_TOKEN_COLON) {
+                if(map_type == MAP_TYPE_LIST) {
+                    /* TODO: Show which lines have mismatching element types */
+                    fprintf(stderr, "Error: You can't mix list elements and map elements in the same map\n");
+                    return -1;
+                }
+                map_type = MAP_TYPE_LIST;
+            } else if(token == TSL_TOKEN_RBRACE) {
+                if(map_type == MAP_TYPE_MAP) {
+                    /* TODO: Show why ':' is needed (because other elements in the map are map types) */
+                    fprintf(stderr, "Error: Expected ':', got '}'\n");
+                    return -1;
+                }
+                return 0;
+            } else {
+                /* TODO: Show only "expects '}' when not using key-value pair" */
+                fprintf(stderr, "Error: Expected ',', ':' or '}', got TODO\n");
+                return -1;
+            }
+        } else if(token == TSL_TOKEN_RBRACE) {
+            return 0;
+        } else {
+            fprintf(stderr, "Error: Expected '}', got TODO\n");
+            return -1;
+        }
+    }
+}
+
 static int tsl_parser_parse_rhs(TslParser *self) {
+    TslToken token = tsl_tokenizer_next(&self->tokenizer);
+    if(token == TSL_TOKEN_NUM) {
+        printf("rhs num: %ld\n", self->tokenizer.number_value);
+    } else if(token == TSL_TOKEN_BOOL) {
+        printf("rhs bool: %s\n", self->tokenizer.bool_value ? "true" : "false");
+    } else if(token == TSL_TOKEN_NULL) {
+        printf("rhs null\n");
+    } else if(token == TSL_TOKEN_STRING) {
+        printf("rhs string: |%.*s|\n", self->tokenizer.string.size, self->tokenizer.string.data);
+    } else {
+        fprintf(stderr, "Error: Expected number, bool or null, got TODO\n");
+        return -1;
+    }
     return 0;
 }
 
@@ -18,10 +85,13 @@ static int tsl_parser_parse(TslParser *self) {
         TslToken token = tsl_tokenizer_next(&self->tokenizer);
         if(token == TSL_TOKEN_IDENTIFIER) {
             TslStringView identifier = self->tokenizer.identifier;
+            printf("identifier: %.*s\n", identifier.size, identifier.data);
             if(!tsl_tokenizer_accept(&self->tokenizer, TSL_TOKEN_EQUAL)) {
                 return -1;
             }
-            tsl_parser_parse_rhs(self);
+            if(tsl_parser_parse_rhs(self) != 0) {
+                return -1;
+            }
         } else if(token == TSL_TOKEN_END_OF_FILE) {
             break;
         } else {
diff --git a/src/tokenizer.c b/src/tokenizer.c
index 97673f5..efc9eee 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -9,6 +9,8 @@ void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size)
 
     self->identifier.data = NULL;
     self->identifier.size = 0;
+    self->string.data = NULL;
+    self->string.size = 0;
     self->bool_value = 0;
     self->number_value = 0;
 }
@@ -84,6 +86,27 @@ static int64_t string_to_int(TslStringView *str) {
     return num;
 }
 
+static int tsl_tokenizer_goto_end_of_string(TslTokenizer *self, char string_start_symbol) {
+    int string_escape = 0;
+    for(;;) {
+        char c = tsl_tokenizer_get_char(self);
+        if(c == string_start_symbol) {
+            ++self->code_index;
+            if(!string_escape)
+                return 1;
+            string_escape = 0;
+        } else if(c == '\\') {
+            ++self->code_index;
+            string_escape = !string_escape;
+        } else if(c == '\0') {
+            return 0;
+        } else {
+            ++self->code_index;
+            string_escape = 0;
+        }
+    }
+}
+
 TslToken tsl_tokenizer_next(TslTokenizer *self) {
     char c;
     tsl_tokenizer_skip_whitespace(self);
@@ -102,16 +125,12 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
         self->identifier.size = self->code_index - identifier_start;
 
         switch(self->identifier.size) {
-            case 3: {
-                if(memcmp(self->identifier.data, "null", 3) == 0) {
-                    return TSL_TOKEN_NULL;
-                }
-                break;
-            }
             case 4: {
                 if(memcmp(self->identifier.data, "true", 4) == 0) {
                     self->bool_value = 1;
                     return TSL_TOKEN_BOOL;
+                } else if(memcmp(self->identifier.data, "null", 4) == 0) {
+                    return TSL_TOKEN_NULL;
                 }
                 break;
             }
@@ -137,9 +156,27 @@ TslToken tsl_tokenizer_next(TslTokenizer *self) {
         self->identifier.size = self->code_index - num_start;
         self->number_value = string_to_int(&self->identifier);
         return TSL_TOKEN_NUM;
+    } else if(c == '"') {
+        char string_start_symbol = c;
+        size_t string_start;
+        ++self->code_index;
+        string_start = self->code_index;
+        if(tsl_tokenizer_goto_end_of_string(self, string_start_symbol)) {
+            self->string.data = self->code + string_start;
+            self->string.size = self->code_index - 1 - string_start;
+            return TSL_TOKEN_STRING;
+        } else {
+            return TSL_TOKEN_END_OF_FILE;
+        }
     } else if(c == '=') {
         ++self->code_index;
         return TSL_TOKEN_EQUAL;
+    } else if(c == '{') {
+        ++self->code_index;
+        return TSL_TOKEN_LBRACE;
+    } else if(c == '}') {
+        ++self->code_index;
+        return TSL_TOKEN_RBRACE;
     } else if(c == '\0') {
         return TSL_TOKEN_END_OF_FILE;
     } else {
author	dec05eba <dec05eba@protonmail.com>	2020-01-14 08:40:26 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-01-14 08:40:26 +0100
commit	44b41bcab276f85c96bce88609865ffa4232f7e2 (patch)
tree	224a20e53b65c2226c54d1b4ed2ee894c2bc9b63
parent	f5fc07765303055922e1dfcc35f2f2f620c1e66b (diff)