aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-02-24 02:10:58 +0100
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:40 +0200
commit11dc4b81935e3dfee997c421d8d6fa166edd7a05 (patch)
treeccb08be54209a4900c740c9ed58e8f9c2910811d /src
Initial commit, Function declaration work somewhat
Diffstat (limited to 'src')
-rw-r--r--src/alloc.c25
-rw-r--r--src/ast.c41
-rw-r--r--src/buffer.c56
-rw-r--r--src/buffer_view.c15
-rw-r--r--src/main.c37
-rw-r--r--src/mem.c10
-rw-r--r--src/parser.c187
-rw-r--r--src/tokenizer.c186
8 files changed, 557 insertions, 0 deletions
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..c9ca7c3
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,25 @@
+#include "../include/alloc.h"
+#include <stdlib.h>
+
+int am_malloc(usize size, void **mem) {
+ void *allocated_data = malloc(size);
+ if(!allocated_data)
+ return ALLOC_FAIL;
+
+ *mem = allocated_data;
+ return ALLOC_OK;
+}
+
+int am_realloc(void *mem, usize new_size, void **new_mem) {
+ void *new_allocated_data = realloc(mem, new_size);
+ if(!new_allocated_data)
+ return ALLOC_FAIL;
+
+ *new_mem = new_allocated_data;
+ return ALLOC_OK;
+}
+
+void am_free(void *mem) {
+ free(mem);
+}
+
diff --git a/src/ast.c b/src/ast.c
new file mode 100644
index 0000000..719d48e
--- /dev/null
+++ b/src/ast.c
@@ -0,0 +1,41 @@
+#include "../include/ast.h"
+
+Ast ast_none() {
+ Ast ast;
+ ast.value.func_decl = NULL;
+ ast.type = AST_NONE;
+ return ast;
+}
+
+void ast_deinit(Ast *ast) {
+ /* TODO: Cleanup the different types of ast */
+ (void)ast;
+}
+
+void funcdecl_init(FunctionDecl *self) {
+ self->name = create_buffer_view_null();
+ buffer_init(&self->body);
+}
+
+void funcdecl_deinit(FunctionDecl *self) {
+ buffer_deinit(&self->body);
+}
+
+int funcdecl_add_to_body(FunctionDecl *self, Ast ast) {
+ return_if_error(buffer_append(&self->body, &ast, sizeof(ast)));
+ return BUFFER_OK;
+}
+
+void funccall_init(FunctionCall *self, BufferView name) {
+ self->name = name;
+}
+
+void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
+ self->isConst = isConst;
+ self->var_name = var_name;
+ self->rhs_expr = ast_none();
+}
+
+void lhsexpr_deinit(LhsExpr *self) {
+ ast_deinit(&self->rhs_expr);
+} \ No newline at end of file
diff --git a/src/buffer.c b/src/buffer.c
new file mode 100644
index 0000000..4bd3b68
--- /dev/null
+++ b/src/buffer.c
@@ -0,0 +1,56 @@
+#include "../include/buffer.h"
+#include "../include/alloc.h"
+#include "../include/mem.h"
+#include <assert.h>
+
+void buffer_init(Buffer *self) {
+ self->data = NULL;
+ self->size = 0;
+ self->capacity = 0;
+}
+
+void buffer_deinit(Buffer *self) {
+ am_free(self->data);
+ self->data = NULL;
+ self->size = 0;
+ self->capacity = 0;
+}
+
+static WARN_UNUSED_RESULT int buffer_ensure_capacity(Buffer *self, usize new_capacity) {
+ usize capacity;
+ void *new_mem;
+ int alloc_result;
+
+ if(self->capacity >= new_capacity)
+ return BUFFER_OK;
+
+ capacity = self->capacity;
+ if(capacity == 0) {
+ capacity = new_capacity;
+ } else {
+ while(capacity < new_capacity) {
+ capacity *= 1.5;
+ }
+ }
+
+ alloc_result = am_realloc(self->data, capacity, &new_mem);
+ if(alloc_result != ALLOC_OK)
+ return BUFFER_ALLOC_FAIL;
+
+ self->data = new_mem;
+ self->capacity = capacity;
+ return BUFFER_OK;
+}
+
+int buffer_append(Buffer *self, void *data, usize size) {
+ return_if_error(buffer_ensure_capacity(self, self->size + size));
+ am_memcpy(self->data + self->size, data, size);
+ return BUFFER_OK;
+}
+
+void* buffer_get(Buffer *self, usize index, usize type_size) {
+ usize real_index;
+ real_index = index * type_size;
+ assert(real_index < self->size);
+ return &self->data[real_index];
+} \ No newline at end of file
diff --git a/src/buffer_view.c b/src/buffer_view.c
new file mode 100644
index 0000000..96b0dd7
--- /dev/null
+++ b/src/buffer_view.c
@@ -0,0 +1,15 @@
+#include "../include/buffer_view.h"
+
+BufferView create_buffer_view_null() {
+ BufferView buffer_view;
+ buffer_view.data = NULL;
+ buffer_view.size = 0;
+ return buffer_view;
+}
+
+BufferView create_buffer_view(const char *data, usize size) {
+ BufferView buffer_view;
+ buffer_view.data = data;
+ buffer_view.size = size;
+ return buffer_view;
+} \ No newline at end of file
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..f3147bb
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <string.h>
+#include "../include/parser.h"
+
+int main() {
+ const char *code;
+ Parser parser;
+ BufferView code_view;
+ int result;
+
+ code =
+ "const main = () {\n"
+ " var hello = () {\n"
+ " \n"
+ " }\n"
+ " hello()\n"
+ "}\n"
+ "const print = () {\n"
+ " \n"
+ "}";
+ result = parser_init(&parser);
+ if(result != PARSER_OK) {
+ fprintf(stderr, "Failed to initialize parser\n");
+ return 1;
+ }
+
+ code_view = create_buffer_view(code, strlen(code));
+ result = parser_parse_buffer(&parser, code_view);
+ if(result != PARSER_OK) {
+ fprintf(stderr, "Failed to parse\n");
+ return 1;
+ }
+
+ /* No need to do this here as the program is exiting */
+ /* parser_deinit(&parser); */
+ return 0;
+}
diff --git a/src/mem.c b/src/mem.c
new file mode 100644
index 0000000..acd2ebd
--- /dev/null
+++ b/src/mem.c
@@ -0,0 +1,10 @@
+#include "../include/mem.h"
+#include <string.h>
+
+void am_memcpy(void *dest, const void *src, usize size) {
+ memcpy(dest, src, size);
+}
+
+bool am_memeql(const void *lhs, const void *rhs, usize size) {
+ return memcmp(lhs, rhs, size) == 0;
+} \ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..ddf4a18
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,187 @@
+#include "../include/parser.h"
+#include "../include/ast.h"
+#include "../include/misc.h"
+#include "../include/alloc.h"
+#include <stdio.h>
+
+static WARN_UNUSED_RESULT int parser_parse_body(Parser *self, Ast *ast);
+
+int parser_init(Parser *self) {
+ buffer_init(&self->ast_objects);
+ return PARSER_OK;
+}
+
+void parser_deinit(Parser *self) {
+ usize i;
+ for(i = 0; i < self->ast_objects.size / sizeof(Ast); ++i) {
+ ast_deinit((Ast*)&self->ast_objects.data[i]);
+ }
+ buffer_deinit(&self->ast_objects);
+}
+
+static WARN_UNUSED_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) {
+ bool isConst;
+ BufferView var_name;
+ *result = NULL;
+
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &isConst));
+ if(!isConst) {
+ bool isVar;
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar));
+ if(!isVar)
+ return PARSER_OK;
+ }
+
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER));
+ var_name = self->tokenizer.value.identifier;
+ fprintf(stderr, "var name: %.*s\n", (int)var_name.size, var_name.data);
+ return_if_error(am_malloc(sizeof(LhsExpr), (void**)result));
+ lhsexpr_init(*result, isConst, var_name);
+ return PARSER_OK;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) {
+ bool result;
+ *func_decl = NULL;
+
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result));
+ if(!result)
+ return PARSER_OK;
+
+ /* TODO: Parse parameters */
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN));
+ /* TODO: Parse return types */
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE));
+
+ return_if_error(am_malloc(sizeof(FunctionDecl), (void**)func_decl));
+ funcdecl_init(*func_decl);
+
+ for(;;) {
+ Ast body_obj;
+ cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result));
+ if(result)
+ break;
+
+ cleanup_if_error(parser_parse_body(self, &body_obj));
+ cleanup_if_error(funcdecl_add_to_body(*func_decl, body_obj));
+ }
+ return PARSER_OK;
+
+ cleanup:
+ if(*func_decl) {
+ funcdecl_deinit(*func_decl);
+ am_free(*func_decl);
+ *func_decl = NULL;
+ }
+ return PARSER_ERR;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) {
+ bool result;
+ BufferView func_name;
+ *func_call = NULL;
+
+ return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result));
+ if(!result)
+ return PARSER_OK;
+
+ func_name = self->tokenizer.value.identifier;
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN));
+ /* TODO: Parse arguments */
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN));
+
+ return_if_error(am_malloc(sizeof(FunctionCall), (void**)func_call));
+ funccall_init(*func_call, func_name);
+ return PARSER_OK;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) {
+ FunctionDecl *func_decl;
+ FunctionCall *func_call;
+ Token token;
+ func_decl = NULL;
+ func_call = NULL;
+
+ cleanup_if_error(parser_parse_function_decl(self, &func_decl));
+ if(func_decl) {
+ rhs_expr->type = AST_FUNCTION_DECL;
+ rhs_expr->value.func_decl = func_decl;
+ return PARSER_OK;
+ }
+
+ cleanup_if_error(parser_parse_function_call(self, &func_call));
+ if(func_call) {
+ rhs_expr->type = AST_FUNCTION_CALL;
+ rhs_expr->value.func_call = func_call;
+ return PARSER_OK;
+ }
+
+ return_if_error(tokenizer_next(&self->tokenizer, &token));
+ /* TODO: Convert token to string */
+ tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call, got token: %d");
+ return PARSER_UNEXPECTED_TOKEN;
+
+ cleanup:
+ if(func_decl) {
+ funcdecl_deinit(func_decl);
+ am_free(func_decl);
+ }
+ if(func_call) {
+ /*funccall_deinit(func_call);*/
+ am_free(func_call);
+ }
+ return PARSER_ERR;
+}
+
+int parser_parse_body(Parser *self, Ast *ast) {
+ LhsExpr *lhs_expr;
+ Ast rhs_expr;
+
+ return_if_error(parser_parse_lhs(self, &lhs_expr));
+ if(lhs_expr)
+ return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS));
+
+ cleanup_if_error(parser_parse_rhs(self, &rhs_expr));
+ if(lhs_expr) {
+ lhs_expr->rhs_expr = rhs_expr;
+ ast->type = AST_LHS;
+ ast->value.lhs_expr = lhs_expr;
+ } else {
+ *ast = rhs_expr;
+ }
+ return PARSER_OK;
+
+ cleanup:
+ if(lhs_expr) {
+ lhsexpr_deinit(lhs_expr);
+ am_free(lhs_expr);
+ }
+ return PARSER_ERR;
+}
+
+int parser_parse_buffer(Parser *self, BufferView code_buffer) {
+ Ast ast;
+ ast = ast_none();
+ return_if_error(tokenizer_init(&self->tokenizer, code_buffer));
+
+ for(;;) {
+ bool isEof;
+ cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof));
+ if(isEof)
+ goto cleanup_noerr;
+
+ cleanup_if_error(parser_parse_body(self, &ast));
+ cleanup_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast)));
+ /* For cleanup, we only want to cleanup the last created ast after parser_parse_body */
+ ast = ast_none();
+ }
+
+ cleanup_noerr:
+ tokenizer_deinit(&self->tokenizer);
+ return PARSER_OK;
+
+ cleanup:
+ tokenizer_deinit(&self->tokenizer);
+ ast_deinit(&ast);
+ return PARSER_ERR;
+}
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..f1763a5
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,186 @@
+#include "../include/tokenizer.h"
+#include "../include/mem.h"
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+static int isAlpha(int c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int isDigit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+static int isAlphaDigit(int c) {
+ return isAlpha(c) || isDigit(c);
+}
+
+int tokenizer_init(Tokenizer *self, BufferView code) {
+ assert(code.size <= INT_MAX);
+ self->code = code;
+ self->index = 0;
+ self->prev_index = 0;
+ self->line = 1;
+ return 0;
+}
+
+void tokenizer_deinit(Tokenizer *self) {
+ (void)self;
+}
+
+static int tokenizer_get_char(Tokenizer *self) {
+ assert(self->index >= 0 && self->index < (int)self->code.size);
+ return self->code.data[self->index];
+}
+
+static Token tokenizer_skip_whitespace(Tokenizer *self) {
+ int c;
+ for(;;) {
+ if(self->index >= (int)self->code.size)
+ return TOK_END_OF_FILE;
+
+ c = self->code.data[self->index];
+ switch(c) {
+ case '\n':
+ ++self->line;
+ /* fallthrough */
+ case ' ':
+ case '\t':
+ break;
+ default:
+ return TOK_NONE;
+ }
+ ++self->index;
+ }
+}
+
+int tokenizer_next(Tokenizer *self, Token *token) {
+ Token last_token;
+ int c;
+
+ last_token = tokenizer_skip_whitespace(self);
+ if(last_token == TOK_END_OF_FILE) {
+ *token = TOK_END_OF_FILE;
+ return TOKENIZER_OK;
+ }
+
+ self->prev_index = self->index;
+ c = tokenizer_get_char(self);
+ if(isAlpha(c) || c == '_') {
+ int identifier_start;
+ identifier_start = self->index;
+ ++self->index;
+
+ while(self->index < (int)self->code.size) {
+ c = tokenizer_get_char(self);
+ if(isAlphaDigit(c) || c == '_')
+ ++self->index;
+ else
+ break;
+ }
+
+ self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start);
+
+ if(am_memeql(self->value.identifier.data, "const", 5))
+ *token = TOK_CONST;
+ else if(am_memeql(self->value.identifier.data, "var", 3))
+ *token = TOK_VAR;
+ else
+ *token = TOK_IDENTIFIER;
+ } else if(c == '=') {
+ ++self->index;
+ *token = TOK_EQUALS;
+ } else if(c == '(') {
+ ++self->index;
+ *token = TOK_OPEN_PAREN;
+ } else if(c == ')') {
+ ++self->index;
+ *token = TOK_CLOSING_PAREN;
+ } else if(c == '{') {
+ ++self->index;
+ *token = TOK_OPEN_BRACE;
+ } else if(c == '}') {
+ ++self->index;
+ *token = TOK_CLOSING_BRACE;
+ } else {
+ /*self.printError("Unexpected symbol '{c}'", c);*/
+ tokenizer_print_error(self, "Unexpected symbol '%c'", c);
+ return TOKENIZER_UNEXPECTED_TOKEN;
+ }
+ return TOKENIZER_OK;
+}
+
+int tokenizer_accept(Tokenizer *self, Token expected_token) {
+ Token actual_token;
+ return_if_error(tokenizer_next(self, &actual_token));
+ if(actual_token == expected_token)
+ return TOKENIZER_OK;
+
+ /* Todo: convert token to string */
+ tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token);
+ return TOKENIZER_UNEXPECTED_TOKEN;
+}
+
+int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) {
+ int index;
+ int line;
+ Token actual_token;
+
+ index = self->index;
+ line = self->line;
+ return_if_error(tokenizer_next(self, &actual_token));
+ if(actual_token == expected_token) {
+ *result = bool_true;
+ } else {
+ /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */
+ self->index = index;
+ self->line = line;
+ *result = bool_false;
+ }
+ return TOKENIZER_OK;
+}
+
+static int tokenizer_get_start_of_line_from_index(Tokenizer *self, int index) {
+ int c;
+ while(index >= 0) {
+ c = self->code.data[(usize)index];
+ if(c == '\n' || c == '\r') {
+ return index + 1;
+ }
+ --index;
+ }
+ return 0;
+}
+
+static int tokenizer_get_end_of_line_from_index(Tokenizer *self, int index) {
+ int c;
+ while(index < (int)self->code.size) {
+ c = self->code.data[(usize)index];
+ if(c == '\n' || c == '\r')
+ break;
+ ++index;
+ }
+ return index;
+}
+
+void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
+ va_list args;
+ int line_start;
+ int line_end;
+ int prev_column;
+ int i;
+
+ va_start(args, fmt);
+ line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index);
+ line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index);
+ prev_column = self->prev_index - line_start;
+ fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column);
+ vfprintf(stderr, fmt, args);
+ fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start);
+ for(i = 0; i < prev_column; ++i)
+ fprintf(stderr, " ");
+ fprintf(stderr, "\x1b[1;32m^\x1b[0m\n");
+ va_end(args);
+} \ No newline at end of file