start

author: dec05eba <dec05eba@protonmail.com> 2020-01-14 07:27:47 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-01-14 07:27:47 +0100
commit: e27bd78c8211532bf0d39d87d2051222f7e86e26 (patch)
tree: 631c050014070e52519798b228770f578b4c0a59 /src
2 files changed, 189 insertions, 0 deletions
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..6653ba2
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,41 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+int main() {
+    TslTokenizer tokenizer;
+    const char *code = 
+"value1 = 1\n"
+"value2 = true\n"
+"value3 = null\n"
+"value4 = \"hello world\"\n"
+"value5 = {\"hello\", \"world\", 5}\n"
+"value6 = {\"hello\": \"world\", \"value\": 23}\n"
+"value7 = fn () {}\n"
+"value8 = fn (value) {}\n"
+"value9 = {\n"
+"    \"hello\": \"world\",\n"
+"    \"sayHello\": fn() {\n"
+"        \n"
+"    }\n"
+"}\n"
+"\n"
+"str = value9[\"hello\"]\n"
+"value9[\"sayHello\"]()";
+    tsl_tokenizer_init(&tokenizer, code, strlen(code));
+    
+    for(;;) {
+        TslToken token = tsl_tokenizer_next(&tokenizer);
+        if(token == TSL_TOKEN_END_OF_FILE) {
+            break;
+        } else if(token == TSL_TOKEN_IDENTIFIER) {
+            printf("identifier: %.*s\n", (int)tokenizer.identifier.size, tokenizer.identifier.data);
+        } else if(token == TSL_TOKEN_NUM) {
+            printf("num: %ld\n", tokenizer.number_value);
+        } else {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+\ No newline at end of file
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..1ab34db
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,148 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
+    self->code = code;
+    self->code_size = code_size;
+    self->code_index = 0;
+
+    self->identifier.data = NULL;
+    self->identifier.size = 0;
+    self->bool_value = 0;
+    self->number_value = 0;
+}
+
+static char tsl_tokenizer_get_char(TslTokenizer *self) {
+    if(self->code_index < self->code_size)
+        return self->code[self->code_index];
+    return '\0';
+}
+
+static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
+    for(;;) {
+        char c = tsl_tokenizer_get_char(self);
+        switch(c) {
+            case ' ':
+            case '\n':
+            case '\t':
+            case '\r':
+                ++self->code_index;
+                break;
+            default:
+                return;
+        }
+    }
+}
+
+static int is_alpha(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int is_num(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static int is_identifier_start(char c) {
+    return is_alpha(c) || c == '_';
+}
+
+static int is_identifier_continue(char c) {
+    return is_alpha(c) || is_num(c) || c == '_';
+}
+
+const int64_t num_multipliers[] = {
+    1,
+    10,
+    100,
+    1000,
+    10000,
+    100000,
+    1000000,
+    10000000,
+    100000000,
+    1000000000,
+    10000000000,
+    100000000000,
+    1000000000000,
+    10000000000000,
+    100000000000000,
+    1000000000000000,
+    10000000000000000,
+    100000000000000000,
+    1000000000000000000
+};
+
+/* TODO: Check if the number if too large to fit into the result */
+static int64_t string_to_int(TslStringView *str) {
+    int64_t num = 0;
+    for(size_t i = 0; i < str->size; ++i) {
+        char digit = str->data[str->size - 1 - i] - '0';
+        num += digit * num_multipliers[i];
+    }
+    return num;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+    char c;
+    tsl_tokenizer_skip_whitespace(self);
+
+    c = tsl_tokenizer_get_char(self);
+    if(is_identifier_start(c)) {
+        size_t identifier_start = self->code_index;
+        ++self->code_index;
+        for(;;) {
+            c = tsl_tokenizer_get_char(self);
+            if(!is_identifier_continue(c))
+                break;
+            ++self->code_index;
+        }
+        self->identifier.data = self->code + identifier_start;
+        self->identifier.size = self->code_index - identifier_start;
+
+        switch(self->identifier.size) {
+            case 3: {
+                if(memcmp(self->identifier.data, "null", 3) == 0) {
+                    return TSL_TOKEN_NULL;
+                }
+                break;
+            }
+            case 4: {
+                if(memcmp(self->identifier.data, "true", 4) == 0) {
+                    self->bool_value = 1;
+                    return TSL_TOKEN_BOOL;
+                }
+                break;
+            }
+            case 5: {
+                if(memcmp(self->identifier.data, "false", 5) == 0) {
+                    self->bool_value = 0;
+                    return TSL_TOKEN_BOOL;
+                }
+                break;
+            }
+        }
+        return TSL_TOKEN_IDENTIFIER;
+    } else if(is_num(c)) {
+        size_t num_start = self->code_index;
+        ++self->code_index;
+        for(;;) {
+            c = tsl_tokenizer_get_char(self);
+            if(!is_num(c))
+                break;
+            ++self->code_index;
+        }
+        self->identifier.data = self->code + num_start;
+        self->identifier.size = self->code_index - num_start;
+        self->number_value = string_to_int(&self->identifier);
+        return TSL_TOKEN_NUM;
+    } else if(c == '=') {
+        ++self->code_index;
+        return TSL_TOKEN_EQUAL;
+    } else if(c == '\0') {
+        return TSL_TOKEN_END_OF_FILE;
+    } else {
+        fprintf(stderr, "Unexpected symbol '%c'\n", c);
+        return TSL_TOKEN_UNEXPECTED_SYMBOL;
+    }
+}
author	dec05eba <dec05eba@protonmail.com>	2020-01-14 07:27:47 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-01-14 07:27:47 +0100
commit	e27bd78c8211532bf0d39d87d2051222f7e86e26 (patch)
tree	631c050014070e52519798b228770f578b4c0a59 /src