aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-01-14 07:27:47 +0100
committerdec05eba <dec05eba@protonmail.com>2020-01-14 07:27:47 +0100
commite27bd78c8211532bf0d39d87d2051222f7e86e26 (patch)
tree631c050014070e52519798b228770f578b4c0a59 /src
start
Diffstat (limited to 'src')
-rw-r--r--src/main.c41
-rw-r--r--src/tokenizer.c148
2 files changed, 189 insertions, 0 deletions
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..6653ba2
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,41 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+int main() {
+ TslTokenizer tokenizer;
+ const char *code =
+"value1 = 1\n"
+"value2 = true\n"
+"value3 = null\n"
+"value4 = \"hello world\"\n"
+"value5 = {\"hello\", \"world\", 5}\n"
+"value6 = {\"hello\": \"world\", \"value\": 23}\n"
+"value7 = fn () {}\n"
+"value8 = fn (value) {}\n"
+"value9 = {\n"
+" \"hello\": \"world\",\n"
+" \"sayHello\": fn() {\n"
+" \n"
+" }\n"
+"}\n"
+"\n"
+"str = value9[\"hello\"]\n"
+"value9[\"sayHello\"]()";
+ tsl_tokenizer_init(&tokenizer, code, strlen(code));
+
+ for(;;) {
+ TslToken token = tsl_tokenizer_next(&tokenizer);
+ if(token == TSL_TOKEN_END_OF_FILE) {
+ break;
+ } else if(token == TSL_TOKEN_IDENTIFIER) {
+ printf("identifier: %.*s\n", (int)tokenizer.identifier.size, tokenizer.identifier.data);
+ } else if(token == TSL_TOKEN_NUM) {
+ printf("num: %ld\n", tokenizer.number_value);
+ } else {
+ return 1;
+ }
+ }
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..1ab34db
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,148 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
+ self->code = code;
+ self->code_size = code_size;
+ self->code_index = 0;
+
+ self->identifier.data = NULL;
+ self->identifier.size = 0;
+ self->bool_value = 0;
+ self->number_value = 0;
+}
+
+static char tsl_tokenizer_get_char(TslTokenizer *self) {
+ if(self->code_index < self->code_size)
+ return self->code[self->code_index];
+ return '\0';
+}
+
+static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
+ for(;;) {
+ char c = tsl_tokenizer_get_char(self);
+ switch(c) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ ++self->code_index;
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+static int is_alpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int is_num(char c) {
+ return c >= '0' && c <= '9';
+}
+
+static int is_identifier_start(char c) {
+ return is_alpha(c) || c == '_';
+}
+
+static int is_identifier_continue(char c) {
+ return is_alpha(c) || is_num(c) || c == '_';
+}
+
+const int64_t num_multipliers[] = {
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000,
+ 100000000000,
+ 1000000000000,
+ 10000000000000,
+ 100000000000000,
+ 1000000000000000,
+ 10000000000000000,
+ 100000000000000000,
+ 1000000000000000000
+};
+
+/* TODO: Check if the number if too large to fit into the result */
+static int64_t string_to_int(TslStringView *str) {
+ int64_t num = 0;
+ for(size_t i = 0; i < str->size; ++i) {
+ char digit = str->data[str->size - 1 - i] - '0';
+ num += digit * num_multipliers[i];
+ }
+ return num;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+ char c;
+ tsl_tokenizer_skip_whitespace(self);
+
+ c = tsl_tokenizer_get_char(self);
+ if(is_identifier_start(c)) {
+ size_t identifier_start = self->code_index;
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(!is_identifier_continue(c))
+ break;
+ ++self->code_index;
+ }
+ self->identifier.data = self->code + identifier_start;
+ self->identifier.size = self->code_index - identifier_start;
+
+ switch(self->identifier.size) {
+ case 3: {
+ if(memcmp(self->identifier.data, "null", 3) == 0) {
+ return TSL_TOKEN_NULL;
+ }
+ break;
+ }
+ case 4: {
+ if(memcmp(self->identifier.data, "true", 4) == 0) {
+ self->bool_value = 1;
+ return TSL_TOKEN_BOOL;
+ }
+ break;
+ }
+ case 5: {
+ if(memcmp(self->identifier.data, "false", 5) == 0) {
+ self->bool_value = 0;
+ return TSL_TOKEN_BOOL;
+ }
+ break;
+ }
+ }
+ return TSL_TOKEN_IDENTIFIER;
+ } else if(is_num(c)) {
+ size_t num_start = self->code_index;
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(!is_num(c))
+ break;
+ ++self->code_index;
+ }
+ self->identifier.data = self->code + num_start;
+ self->identifier.size = self->code_index - num_start;
+ self->number_value = string_to_int(&self->identifier);
+ return TSL_TOKEN_NUM;
+ } else if(c == '=') {
+ ++self->code_index;
+ return TSL_TOKEN_EQUAL;
+ } else if(c == '\0') {
+ return TSL_TOKEN_END_OF_FILE;
+ } else {
+ fprintf(stderr, "Unexpected symbol '%c'\n", c);
+ return TSL_TOKEN_UNEXPECTED_SYMBOL;
+ }
+}