From 11dc4b81935e3dfee997c421d8d6fa166edd7a05 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 24 Feb 2019 02:10:58 +0100 Subject: Initial commit, Function declaration work somewhat --- src/alloc.c | 25 ++++++++ src/ast.c | 41 ++++++++++++ src/buffer.c | 56 ++++++++++++++++ src/buffer_view.c | 15 +++++ src/main.c | 37 +++++++++++ src/mem.c | 10 +++ src/parser.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 557 insertions(+) create mode 100644 src/alloc.c create mode 100644 src/ast.c create mode 100644 src/buffer.c create mode 100644 src/buffer_view.c create mode 100644 src/main.c create mode 100644 src/mem.c create mode 100644 src/parser.c create mode 100644 src/tokenizer.c (limited to 'src') diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..c9ca7c3 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,25 @@ +#include "../include/alloc.h" +#include + +int am_malloc(usize size, void **mem) { + void *allocated_data = malloc(size); + if(!allocated_data) + return ALLOC_FAIL; + + *mem = allocated_data; + return ALLOC_OK; +} + +int am_realloc(void *mem, usize new_size, void **new_mem) { + void *new_allocated_data = realloc(mem, new_size); + if(!new_allocated_data) + return ALLOC_FAIL; + + *new_mem = new_allocated_data; + return ALLOC_OK; +} + +void am_free(void *mem) { + free(mem); +} + diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..719d48e --- /dev/null +++ b/src/ast.c @@ -0,0 +1,41 @@ +#include "../include/ast.h" + +Ast ast_none() { + Ast ast; + ast.value.func_decl = NULL; + ast.type = AST_NONE; + return ast; +} + +void ast_deinit(Ast *ast) { + /* TODO: Cleanup the different types of ast */ + (void)ast; +} + +void funcdecl_init(FunctionDecl *self) { + self->name = create_buffer_view_null(); + buffer_init(&self->body); +} + +void funcdecl_deinit(FunctionDecl *self) { + buffer_deinit(&self->body); +} + +int funcdecl_add_to_body(FunctionDecl *self, Ast ast) { + return_if_error(buffer_append(&self->body, &ast, sizeof(ast))); + return BUFFER_OK; +} + +void funccall_init(FunctionCall *self, BufferView name) { + self->name = name; +} + +void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) { + self->isConst = isConst; + self->var_name = var_name; + self->rhs_expr = ast_none(); +} + +void lhsexpr_deinit(LhsExpr *self) { + ast_deinit(&self->rhs_expr); +} \ No newline at end of file diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..4bd3b68 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,56 @@ +#include "../include/buffer.h" +#include "../include/alloc.h" +#include "../include/mem.h" +#include + +void buffer_init(Buffer *self) { + self->data = NULL; + self->size = 0; + self->capacity = 0; +} + +void buffer_deinit(Buffer *self) { + am_free(self->data); + self->data = NULL; + self->size = 0; + self->capacity = 0; +} + +static WARN_UNUSED_RESULT int buffer_ensure_capacity(Buffer *self, usize new_capacity) { + usize capacity; + void *new_mem; + int alloc_result; + + if(self->capacity >= new_capacity) + return BUFFER_OK; + + capacity = self->capacity; + if(capacity == 0) { + capacity = new_capacity; + } else { + while(capacity < new_capacity) { + capacity *= 1.5; + } + } + + alloc_result = am_realloc(self->data, capacity, &new_mem); + if(alloc_result != ALLOC_OK) + return BUFFER_ALLOC_FAIL; + + self->data = new_mem; + self->capacity = capacity; + return BUFFER_OK; +} + +int buffer_append(Buffer *self, void *data, usize size) { + return_if_error(buffer_ensure_capacity(self, self->size + size)); + am_memcpy(self->data + self->size, data, size); + return BUFFER_OK; +} + +void* buffer_get(Buffer *self, usize index, usize type_size) { + usize real_index; + real_index = index * type_size; + assert(real_index < self->size); + return &self->data[real_index]; +} \ No newline at end of file diff --git a/src/buffer_view.c b/src/buffer_view.c new file mode 100644 index 0000000..96b0dd7 --- /dev/null +++ b/src/buffer_view.c @@ -0,0 +1,15 @@ +#include "../include/buffer_view.h" + +BufferView create_buffer_view_null() { + BufferView buffer_view; + buffer_view.data = NULL; + buffer_view.size = 0; + return buffer_view; +} + +BufferView create_buffer_view(const char *data, usize size) { + BufferView buffer_view; + buffer_view.data = data; + buffer_view.size = size; + return buffer_view; +} \ No newline at end of file diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..f3147bb --- /dev/null +++ b/src/main.c @@ -0,0 +1,37 @@ +#include +#include +#include "../include/parser.h" + +int main() { + const char *code; + Parser parser; + BufferView code_view; + int result; + + code = + "const main = () {\n" + " var hello = () {\n" + " \n" + " }\n" + " hello()\n" + "}\n" + "const print = () {\n" + " \n" + "}"; + result = parser_init(&parser); + if(result != PARSER_OK) { + fprintf(stderr, "Failed to initialize parser\n"); + return 1; + } + + code_view = create_buffer_view(code, strlen(code)); + result = parser_parse_buffer(&parser, code_view); + if(result != PARSER_OK) { + fprintf(stderr, "Failed to parse\n"); + return 1; + } + + /* No need to do this here as the program is exiting */ + /* parser_deinit(&parser); */ + return 0; +} diff --git a/src/mem.c b/src/mem.c new file mode 100644 index 0000000..acd2ebd --- /dev/null +++ b/src/mem.c @@ -0,0 +1,10 @@ +#include "../include/mem.h" +#include + +void am_memcpy(void *dest, const void *src, usize size) { + memcpy(dest, src, size); +} + +bool am_memeql(const void *lhs, const void *rhs, usize size) { + return memcmp(lhs, rhs, size) == 0; +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..ddf4a18 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,187 @@ +#include "../include/parser.h" +#include "../include/ast.h" +#include "../include/misc.h" +#include "../include/alloc.h" +#include + +static WARN_UNUSED_RESULT int parser_parse_body(Parser *self, Ast *ast); + +int parser_init(Parser *self) { + buffer_init(&self->ast_objects); + return PARSER_OK; +} + +void parser_deinit(Parser *self) { + usize i; + for(i = 0; i < self->ast_objects.size / sizeof(Ast); ++i) { + ast_deinit((Ast*)&self->ast_objects.data[i]); + } + buffer_deinit(&self->ast_objects); +} + +static WARN_UNUSED_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) { + bool isConst; + BufferView var_name; + *result = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &isConst)); + if(!isConst) { + bool isVar; + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar)); + if(!isVar) + return PARSER_OK; + } + + return_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); + var_name = self->tokenizer.value.identifier; + fprintf(stderr, "var name: %.*s\n", (int)var_name.size, var_name.data); + return_if_error(am_malloc(sizeof(LhsExpr), (void**)result)); + lhsexpr_init(*result, isConst, var_name); + return PARSER_OK; +} + +static WARN_UNUSED_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { + bool result; + *func_decl = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result)); + if(!result) + return PARSER_OK; + + /* TODO: Parse parameters */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); + /* TODO: Parse return types */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); + + return_if_error(am_malloc(sizeof(FunctionDecl), (void**)func_decl)); + funcdecl_init(*func_decl); + + for(;;) { + Ast body_obj; + cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result)); + if(result) + break; + + cleanup_if_error(parser_parse_body(self, &body_obj)); + cleanup_if_error(funcdecl_add_to_body(*func_decl, body_obj)); + } + return PARSER_OK; + + cleanup: + if(*func_decl) { + funcdecl_deinit(*func_decl); + am_free(*func_decl); + *func_decl = NULL; + } + return PARSER_ERR; +} + +static WARN_UNUSED_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) { + bool result; + BufferView func_name; + *func_call = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result)); + if(!result) + return PARSER_OK; + + func_name = self->tokenizer.value.identifier; + return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); + /* TODO: Parse arguments */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); + + return_if_error(am_malloc(sizeof(FunctionCall), (void**)func_call)); + funccall_init(*func_call, func_name); + return PARSER_OK; +} + +static WARN_UNUSED_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { + FunctionDecl *func_decl; + FunctionCall *func_call; + Token token; + func_decl = NULL; + func_call = NULL; + + cleanup_if_error(parser_parse_function_decl(self, &func_decl)); + if(func_decl) { + rhs_expr->type = AST_FUNCTION_DECL; + rhs_expr->value.func_decl = func_decl; + return PARSER_OK; + } + + cleanup_if_error(parser_parse_function_call(self, &func_call)); + if(func_call) { + rhs_expr->type = AST_FUNCTION_CALL; + rhs_expr->value.func_call = func_call; + return PARSER_OK; + } + + return_if_error(tokenizer_next(&self->tokenizer, &token)); + /* TODO: Convert token to string */ + tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call, got token: %d"); + return PARSER_UNEXPECTED_TOKEN; + + cleanup: + if(func_decl) { + funcdecl_deinit(func_decl); + am_free(func_decl); + } + if(func_call) { + /*funccall_deinit(func_call);*/ + am_free(func_call); + } + return PARSER_ERR; +} + +int parser_parse_body(Parser *self, Ast *ast) { + LhsExpr *lhs_expr; + Ast rhs_expr; + + return_if_error(parser_parse_lhs(self, &lhs_expr)); + if(lhs_expr) + return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS)); + + cleanup_if_error(parser_parse_rhs(self, &rhs_expr)); + if(lhs_expr) { + lhs_expr->rhs_expr = rhs_expr; + ast->type = AST_LHS; + ast->value.lhs_expr = lhs_expr; + } else { + *ast = rhs_expr; + } + return PARSER_OK; + + cleanup: + if(lhs_expr) { + lhsexpr_deinit(lhs_expr); + am_free(lhs_expr); + } + return PARSER_ERR; +} + +int parser_parse_buffer(Parser *self, BufferView code_buffer) { + Ast ast; + ast = ast_none(); + return_if_error(tokenizer_init(&self->tokenizer, code_buffer)); + + for(;;) { + bool isEof; + cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof)); + if(isEof) + goto cleanup_noerr; + + cleanup_if_error(parser_parse_body(self, &ast)); + cleanup_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast))); + /* For cleanup, we only want to cleanup the last created ast after parser_parse_body */ + ast = ast_none(); + } + + cleanup_noerr: + tokenizer_deinit(&self->tokenizer); + return PARSER_OK; + + cleanup: + tokenizer_deinit(&self->tokenizer); + ast_deinit(&ast); + return PARSER_ERR; +} diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 0000000..f1763a5 --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,186 @@ +#include "../include/tokenizer.h" +#include "../include/mem.h" +#include +#include +#include +#include + +static int isAlpha(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static int isDigit(int c) { + return c >= '0' && c <= '9'; +} + +static int isAlphaDigit(int c) { + return isAlpha(c) || isDigit(c); +} + +int tokenizer_init(Tokenizer *self, BufferView code) { + assert(code.size <= INT_MAX); + self->code = code; + self->index = 0; + self->prev_index = 0; + self->line = 1; + return 0; +} + +void tokenizer_deinit(Tokenizer *self) { + (void)self; +} + +static int tokenizer_get_char(Tokenizer *self) { + assert(self->index >= 0 && self->index < (int)self->code.size); + return self->code.data[self->index]; +} + +static Token tokenizer_skip_whitespace(Tokenizer *self) { + int c; + for(;;) { + if(self->index >= (int)self->code.size) + return TOK_END_OF_FILE; + + c = self->code.data[self->index]; + switch(c) { + case '\n': + ++self->line; + /* fallthrough */ + case ' ': + case '\t': + break; + default: + return TOK_NONE; + } + ++self->index; + } +} + +int tokenizer_next(Tokenizer *self, Token *token) { + Token last_token; + int c; + + last_token = tokenizer_skip_whitespace(self); + if(last_token == TOK_END_OF_FILE) { + *token = TOK_END_OF_FILE; + return TOKENIZER_OK; + } + + self->prev_index = self->index; + c = tokenizer_get_char(self); + if(isAlpha(c) || c == '_') { + int identifier_start; + identifier_start = self->index; + ++self->index; + + while(self->index < (int)self->code.size) { + c = tokenizer_get_char(self); + if(isAlphaDigit(c) || c == '_') + ++self->index; + else + break; + } + + self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start); + + if(am_memeql(self->value.identifier.data, "const", 5)) + *token = TOK_CONST; + else if(am_memeql(self->value.identifier.data, "var", 3)) + *token = TOK_VAR; + else + *token = TOK_IDENTIFIER; + } else if(c == '=') { + ++self->index; + *token = TOK_EQUALS; + } else if(c == '(') { + ++self->index; + *token = TOK_OPEN_PAREN; + } else if(c == ')') { + ++self->index; + *token = TOK_CLOSING_PAREN; + } else if(c == '{') { + ++self->index; + *token = TOK_OPEN_BRACE; + } else if(c == '}') { + ++self->index; + *token = TOK_CLOSING_BRACE; + } else { + /*self.printError("Unexpected symbol '{c}'", c);*/ + tokenizer_print_error(self, "Unexpected symbol '%c'", c); + return TOKENIZER_UNEXPECTED_TOKEN; + } + return TOKENIZER_OK; +} + +int tokenizer_accept(Tokenizer *self, Token expected_token) { + Token actual_token; + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) + return TOKENIZER_OK; + + /* Todo: convert token to string */ + tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); + return TOKENIZER_UNEXPECTED_TOKEN; +} + +int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) { + int index; + int line; + Token actual_token; + + index = self->index; + line = self->line; + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) { + *result = bool_true; + } else { + /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */ + self->index = index; + self->line = line; + *result = bool_false; + } + return TOKENIZER_OK; +} + +static int tokenizer_get_start_of_line_from_index(Tokenizer *self, int index) { + int c; + while(index >= 0) { + c = self->code.data[(usize)index]; + if(c == '\n' || c == '\r') { + return index + 1; + } + --index; + } + return 0; +} + +static int tokenizer_get_end_of_line_from_index(Tokenizer *self, int index) { + int c; + while(index < (int)self->code.size) { + c = self->code.data[(usize)index]; + if(c == '\n' || c == '\r') + break; + ++index; + } + return index; +} + +void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { + va_list args; + int line_start; + int line_end; + int prev_column; + int i; + + va_start(args, fmt); + line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index); + line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index); + prev_column = self->prev_index - line_start; + fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start); + for(i = 0; i < prev_column; ++i) + fprintf(stderr, " "); + fprintf(stderr, "\x1b[1;32m^\x1b[0m\n"); + va_end(args); +} \ No newline at end of file -- cgit v1.2.3