aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--Makefile10
-rw-r--r--SYNTAX.md19
-rw-r--r--include/tokenizer.h37
-rw-r--r--src/main.c41
-rw-r--r--src/tokenizer.c148
6 files changed, 257 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c6275a9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+tsl
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9176922
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,10 @@
+CFLAGS=-Wall -Wextra -g3
+
+all: main.o tokenizer.o
+ cc -o tsl main.o tokenizer.o -fPIE
+
+main.o: src/main.c include/tokenizer.h
+ cc -c src/main.c $(CFLAGS)
+
+tokenizer.o: src/tokenizer.c include/tokenizer.h
+ cc -c src/tokenizer.c $(CFLAGS)
diff --git a/SYNTAX.md b/SYNTAX.md
new file mode 100644
index 0000000..149633e
--- /dev/null
+++ b/SYNTAX.md
@@ -0,0 +1,19 @@
+```
+value1 = 1
+value2 = true
+value3 = null
+value4 = "hello world"
+value5 = {"hello", "world", 5}
+value6 = {"hello": "world", "value": 23}
+value7 = fn () {}
+value8 = fn (value) {}
+value9 = {
+ "hello": "world",
+ "sayHello": fn() {
+
+ }
+}
+
+str = value9["hello"]
+value9["sayHello"]()
+```
diff --git a/include/tokenizer.h b/include/tokenizer.h
new file mode 100644
index 0000000..decdae4
--- /dev/null
+++ b/include/tokenizer.h
@@ -0,0 +1,37 @@
+#ifndef TSL_TOKENIZER_H
+#define TSL_TOKENIZER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+ const char *data;
+ size_t size;
+} TslStringView;
+
+typedef enum {
+ TSL_TOKEN_END_OF_FILE,
+ TSL_TOKEN_UNEXPECTED_SYMBOL,
+ TSL_TOKEN_IDENTIFIER,
+ TSL_TOKEN_NUM,
+ TSL_TOKEN_BOOL,
+ TSL_TOKEN_NULL,
+ TSL_TOKEN_EQUAL
+} TslToken;
+
+typedef struct {
+ const char *code;
+ size_t code_size;
+ size_t code_index;
+
+ TslStringView identifier;
+ int bool_value;
+ int64_t number_value;
+} TslTokenizer;
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size);
+
+TslToken tsl_tokenizer_next(TslTokenizer *self);
+
+#endif /* TSL_TOKENIZER_H */
+
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..6653ba2
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,41 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+int main() {
+ TslTokenizer tokenizer;
+ const char *code =
+"value1 = 1\n"
+"value2 = true\n"
+"value3 = null\n"
+"value4 = \"hello world\"\n"
+"value5 = {\"hello\", \"world\", 5}\n"
+"value6 = {\"hello\": \"world\", \"value\": 23}\n"
+"value7 = fn () {}\n"
+"value8 = fn (value) {}\n"
+"value9 = {\n"
+" \"hello\": \"world\",\n"
+" \"sayHello\": fn() {\n"
+" \n"
+" }\n"
+"}\n"
+"\n"
+"str = value9[\"hello\"]\n"
+"value9[\"sayHello\"]()";
+ tsl_tokenizer_init(&tokenizer, code, strlen(code));
+
+ for(;;) {
+ TslToken token = tsl_tokenizer_next(&tokenizer);
+ if(token == TSL_TOKEN_END_OF_FILE) {
+ break;
+ } else if(token == TSL_TOKEN_IDENTIFIER) {
+ printf("identifier: %.*s\n", (int)tokenizer.identifier.size, tokenizer.identifier.data);
+ } else if(token == TSL_TOKEN_NUM) {
+ printf("num: %ld\n", tokenizer.number_value);
+ } else {
+ return 1;
+ }
+ }
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..1ab34db
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,148 @@
+#include "../include/tokenizer.h"
+#include <string.h>
+#include <stdio.h>
+
+void tsl_tokenizer_init(TslTokenizer *self, const char *code, size_t code_size) {
+ self->code = code;
+ self->code_size = code_size;
+ self->code_index = 0;
+
+ self->identifier.data = NULL;
+ self->identifier.size = 0;
+ self->bool_value = 0;
+ self->number_value = 0;
+}
+
+static char tsl_tokenizer_get_char(TslTokenizer *self) {
+ if(self->code_index < self->code_size)
+ return self->code[self->code_index];
+ return '\0';
+}
+
+static void tsl_tokenizer_skip_whitespace(TslTokenizer *self) {
+ for(;;) {
+ char c = tsl_tokenizer_get_char(self);
+ switch(c) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ ++self->code_index;
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+static int is_alpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int is_num(char c) {
+ return c >= '0' && c <= '9';
+}
+
+static int is_identifier_start(char c) {
+ return is_alpha(c) || c == '_';
+}
+
+static int is_identifier_continue(char c) {
+ return is_alpha(c) || is_num(c) || c == '_';
+}
+
+const int64_t num_multipliers[] = {
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000,
+ 100000000000,
+ 1000000000000,
+ 10000000000000,
+ 100000000000000,
+ 1000000000000000,
+ 10000000000000000,
+ 100000000000000000,
+ 1000000000000000000
+};
+
+/* TODO: Check if the number if too large to fit into the result */
+static int64_t string_to_int(TslStringView *str) {
+ int64_t num = 0;
+ for(size_t i = 0; i < str->size; ++i) {
+ char digit = str->data[str->size - 1 - i] - '0';
+ num += digit * num_multipliers[i];
+ }
+ return num;
+}
+
+TslToken tsl_tokenizer_next(TslTokenizer *self) {
+ char c;
+ tsl_tokenizer_skip_whitespace(self);
+
+ c = tsl_tokenizer_get_char(self);
+ if(is_identifier_start(c)) {
+ size_t identifier_start = self->code_index;
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(!is_identifier_continue(c))
+ break;
+ ++self->code_index;
+ }
+ self->identifier.data = self->code + identifier_start;
+ self->identifier.size = self->code_index - identifier_start;
+
+ switch(self->identifier.size) {
+ case 3: {
+ if(memcmp(self->identifier.data, "null", 3) == 0) {
+ return TSL_TOKEN_NULL;
+ }
+ break;
+ }
+ case 4: {
+ if(memcmp(self->identifier.data, "true", 4) == 0) {
+ self->bool_value = 1;
+ return TSL_TOKEN_BOOL;
+ }
+ break;
+ }
+ case 5: {
+ if(memcmp(self->identifier.data, "false", 5) == 0) {
+ self->bool_value = 0;
+ return TSL_TOKEN_BOOL;
+ }
+ break;
+ }
+ }
+ return TSL_TOKEN_IDENTIFIER;
+ } else if(is_num(c)) {
+ size_t num_start = self->code_index;
+ ++self->code_index;
+ for(;;) {
+ c = tsl_tokenizer_get_char(self);
+ if(!is_num(c))
+ break;
+ ++self->code_index;
+ }
+ self->identifier.data = self->code + num_start;
+ self->identifier.size = self->code_index - num_start;
+ self->number_value = string_to_int(&self->identifier);
+ return TSL_TOKEN_NUM;
+ } else if(c == '=') {
+ ++self->code_index;
+ return TSL_TOKEN_EQUAL;
+ } else if(c == '\0') {
+ return TSL_TOKEN_END_OF_FILE;
+ } else {
+ fprintf(stderr, "Unexpected symbol '%c'\n", c);
+ return TSL_TOKEN_UNEXPECTED_SYMBOL;
+ }
+}