start

author: dec05eba <dec05eba@protonmail.com> 2020-01-14 07:27:47 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-01-14 07:27:47 +0100
commit: e27bd78c8211532bf0d39d87d2051222f7e86e26 (patch)
tree: 631c050014070e52519798b228770f578b4c0a59
6 files changed, 257 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c6275a9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+tsl
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9176922
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,10 @@
+CFLAGS=-Wall -Wextra -g3
+
+all: main.o tokenizer.o
+	cc -o tsl main.o tokenizer.o -fPIE
+
+main.o: src/main.c include/tokenizer.h
+	cc -c src/main.c $(CFLAGS)
+
+tokenizer.o: src/tokenizer.c include/tokenizer.h
+	cc -c src/tokenizer.c $(CFLAGS)
diff --git a/SYNTAX.md b/SYNTAX.md
new file mode 100644
index 0000000..149633e
--- /dev/null
+++ b/SYNTAX.md
@@ -0,0 +1,19 @@
+```
+value1 = 1
+value2 = true
+value3 = null
+value4 = "hello world"
+value5 = {"hello", "world", 5}
+value6 = {"hello": "world", "value": 23}
+value7 = fn () {}
+value8 = fn (value) {}
+value9 = {
+    "hello": "world",
+    "sayHello": fn() {
+        
+    }
+}
+
+str = value9["hello"]
+value9["sayHello"]()
+```
diff --git a/include/tokenizer.h b/include/tokenizer.h
new file mode 100644
index 0000000..decdae4
--- /dev/null
+++ b/include/tokenizer.h
@@ -0,0 +1,37 @@
+#ifndef TSL_TOKENIZER_H
+#define TSL_TOKENIZER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+    const char *data;
+    size_t size;
+} TslStringView;
+
+typedef enum {
+    TSL_TOKEN_END_OF_FILE,
+    TSL_TOKEN_UNEXPECTED_SYMBOL,
+    TSL_TOKEN_IDENTIFIER,
+    TSL_TOKEN_NUM,
+    TSL_TOKEN_BOOL,
+    TSL_TOKEN_NULL,
+    TSL_TOKEN_EQUAL
+} TslToken;
+
+typedef struct {
+    const char *code;
+    size_t code_size;
+    size_t code_index;
+
+    TslStringView identifier;
+    int bool_value;
+    int64_t number_value;
+} TslTokenizer;
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size);
+
+TslToken tsl_tokenizer_next(TslTokenizer *self);
+
+#endif /* TSL_TOKENIZER_H */
+
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..6653ba2
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,41 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+int main() {
+    TslTokenizer tokenizer;
+    const char *code = 
+"value1 = 1\n"
+"value2 = true\n"
+"value3 = null\n"
+"value4 = \"hello world\"\n"
+"value5 = {\"hello\", \"world\", 5}\n"
+"value6 = {\"hello\": \"world\", \"value\": 23}\n"
+"value7 = fn () {}\n"
+"value8 = fn (value) {}\n"
+"value9 = {\n"
+"    \"hello\": \"world\",\n"
+"    \"sayHello\": fn() {\n"
+"        \n"
+"    }\n"
+"}\n"
+"\n"
+"str = value9[\"hello\"]\n"
+"value9[\"sayHello\"]()";
+    tsl_tokenizer_init(&tokenizer, code, strlen(code));
+    
+    for(;;) {
+        TslToken token = tsl_tokenizer_next(&tokenizer);
+        if(token == TSL_TOKEN_END_OF_FILE) {
+            break;
+        } else if(token == TSL_TOKEN_IDENTIFIER) {
+            printf("identifier: %.*s\n", (int)tokenizer.identifier.size, tokenizer.identifier.data);
+        } else if(token == TSL_TOKEN_NUM) {
+            printf("num: %ld\n", tokenizer.number_value);
+        } else {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+\ No newline at end of file
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..1ab34db
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,148 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
+    self->code = code;
+    self->code_size = code_size;
+    self->code_index = 0;
+
+    self->identifier.data = NULL;
+    self->identifier.size = 0;
+    self->bool_value = 0;
+    self->number_value = 0;
+}
+
+static char tsl_tokenizer_get_char(TslTokenizer *self) {
+    if(self->code_index < self->code_size)
+        return self->code[self->code_index];
+    return '\0';
+}
+
+static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
+    for(;;) {
+        char c = tsl_tokenizer_get_char(self);
+        switch(c) {
+            case ' ':
+            case '\n':
+            case '\t':
+            case '\r':
+                ++self->code_index;
+                break;
+            default:
+                return;
+        }
+    }
+}
+
+static int is_alpha(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int is_num(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static int is_identifier_start(char c) {
+    return is_alpha(c) || c == '_';
+}
+
+static int is_identifier_continue(char c) {
+    return is_alpha(c) || is_num(c) || c == '_';
+}
+
+const int64_t num_multipliers[] = {
+    1,
+    10,
+    100,
+    1000,
+    10000,
+    100000,
+    1000000,
+    10000000,
+    100000000,
+    1000000000,
+    10000000000,
+    100000000000,
+    1000000000000,
+    10000000000000,
+    100000000000000,
+    1000000000000000,
+    10000000000000000,
+    100000000000000000,
+    1000000000000000000
+};
+
+/* TODO: Check if the number if too large to fit into the result */
+static int64_t string_to_int(TslStringView *str) {
+    int64_t num = 0;
+    for(size_t i = 0; i < str->size; ++i) {
+        char digit = str->data[str->size - 1 - i] - '0';
+        num += digit * num_multipliers[i];
+    }
+    return num;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+    char c;
+    tsl_tokenizer_skip_whitespace(self);
+
+    c = tsl_tokenizer_get_char(self);
+    if(is_identifier_start(c)) {
+        size_t identifier_start = self->code_index;
+        ++self->code_index;
+        for(;;) {
+            c = tsl_tokenizer_get_char(self);
+            if(!is_identifier_continue(c))
+                break;
+            ++self->code_index;
+        }
+        self->identifier.data = self->code + identifier_start;
+        self->identifier.size = self->code_index - identifier_start;
+
+        switch(self->identifier.size) {
+            case 3: {
+                if(memcmp(self->identifier.data, "null", 3) == 0) {
+                    return TSL_TOKEN_NULL;
+                }
+                break;
+            }
+            case 4: {
+                if(memcmp(self->identifier.data, "true", 4) == 0) {
+                    self->bool_value = 1;
+                    return TSL_TOKEN_BOOL;
+                }
+                break;
+            }
+            case 5: {
+                if(memcmp(self->identifier.data, "false", 5) == 0) {
+                    self->bool_value = 0;
+                    return TSL_TOKEN_BOOL;
+                }
+                break;
+            }
+        }
+        return TSL_TOKEN_IDENTIFIER;
+    } else if(is_num(c)) {
+        size_t num_start = self->code_index;
+        ++self->code_index;
+        for(;;) {
+            c = tsl_tokenizer_get_char(self);
+            if(!is_num(c))
+                break;
+            ++self->code_index;
+        }
+        self->identifier.data = self->code + num_start;
+        self->identifier.size = self->code_index - num_start;
+        self->number_value = string_to_int(&self->identifier);
+        return TSL_TOKEN_NUM;
+    } else if(c == '=') {
+        ++self->code_index;
+        return TSL_TOKEN_EQUAL;
+    } else if(c == '\0') {
+        return TSL_TOKEN_END_OF_FILE;
+    } else {
+        fprintf(stderr, "Unexpected symbol '%c'\n", c);
+        return TSL_TOKEN_UNEXPECTED_SYMBOL;
+    }
+}
author	dec05eba <dec05eba@protonmail.com>	2020-01-14 07:27:47 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-01-14 07:27:47 +0100
commit	e27bd78c8211532bf0d39d87d2051222f7e86e26 (patch)
tree	631c050014070e52519798b228770f578b4c0a59