From 9428fceb0cacf5ff9e19116de658bcffb98efc6f Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 9 Mar 2019 00:43:13 +0100 Subject: Add check for duplicate variable names --- include/ast.h | 19 ++++++++++++++----- include/tokenizer.h | 10 +++++++--- src/ast.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/parser.c | 40 +++++++++++++++++++++++++++++++-------- src/std/hash_map.c | 2 +- src/tokenizer.c | 49 +++++++++++++++++++++++++++++++++++++++++++----- tests/main.amal | 1 + 7 files changed, 151 insertions(+), 24 deletions(-) diff --git a/include/ast.h b/include/ast.h index 7911a59..1d40a89 100644 --- a/include/ast.h +++ b/include/ast.h @@ -5,8 +5,13 @@ #include "std/buffer.h" #include "std/misc.h" #include "std/scoped_allocator.h" +#include "std/hash_map.h" #include "binop_type.h" +/* General error */ +#define AST_ERR -1 +#define AST_ERR_DEF_DUP -20 + typedef struct FunctionDecl FunctionDecl; typedef struct FunctionCall FunctionCall; typedef struct LhsExpr LhsExpr; @@ -45,9 +50,14 @@ typedef struct { AstType type; } Ast; +struct Scope { + Buffer ast_objects; + HashMap/*(key=BufferView, value=Ast)*/ named_objects; +}; + struct FunctionDecl { BufferView name; - Buffer body; + Scope body; }; struct FunctionCall { @@ -90,11 +100,8 @@ struct Binop { bool grouped; }; -struct Scope { - Buffer ast_objects; -}; - Ast ast_none(); +BufferView ast_get_name(Ast *self); CHECK_RESULT int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator); CHECK_RESULT int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocator); @@ -103,7 +110,9 @@ void import_init(Import *self, BufferView path); CHECK_RESULT int string_init(String *self, BufferView str); void number_init(Number *self, i64 value, bool is_integer); void binop_init(Binop *self); + CHECK_RESULT int scope_init(Scope *self, ScopedAllocator *allocator); +CHECK_RESULT int scope_add_child(Scope *self, Ast *child); void scope_resolve(Scope *self); #endif diff --git a/include/tokenizer.h b/include/tokenizer.h index 21e93ee..b38cc58 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -3,6 +3,7 @@ #include "std/buffer_view.h" #include "std/misc.h" +#include "std/defs.h" #include "binop_type.h" #define TOKENIZER_OK 0 @@ -52,14 +53,15 @@ typedef struct { BinopType binop_type; } value; bool number_is_integer; + ScopedAllocator *allocator; /* borrowed */ } Tokenizer; typedef struct { int index; - const char* str; + char* str; } TokenizerError; -CHECK_RESULT int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name); +CHECK_RESULT int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code, BufferView code_name); CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); /* @result is set to 0 if the next token is equal to @expected_token, @@ -68,6 +70,8 @@ CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result); void tokenizer_print_error(Tokenizer *self, const char *fmt, ...); void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error); -TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str); +TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...); +int tokenizer_get_error_index(Tokenizer *self); +int tokenizer_get_code_reference_index(Tokenizer *self, const char *ref); #endif diff --git a/src/ast.c b/src/ast.c index c28b314..2154531 100644 --- a/src/ast.c +++ b/src/ast.c @@ -1,5 +1,6 @@ #include "../include/ast.h" #include "../include/std/log.h" +#include "../include/std/hash.h" #include static void ast_resolve(Ast *self); @@ -11,9 +12,35 @@ Ast ast_none() { return ast; } +BufferView ast_get_name(Ast *self) { + BufferView name; + switch(self->type) { + case AST_NONE: + case AST_IMPORT: + case AST_STRING: + case AST_NUMBER: + case AST_BINOP: + name = create_buffer_view_null(); + break; + case AST_LHS: + name = self->value.lhs_expr->var_name; + break; + case AST_FUNCTION_DECL: + name = self->value.func_decl->name; + break; + case AST_FUNCTION_CALL: + name = self->value.func_call->name; + break; + case AST_VARIABLE: + name = self->value.variable->name; + break; + } + return name; +} + int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator) { self->name = create_buffer_view_null(); - return buffer_init(&self->body, allocator); + return scope_init(&self->body, allocator); } int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocator) { @@ -51,7 +78,29 @@ void binop_init(Binop *self) { } int scope_init(Scope *self, ScopedAllocator *allocator) { - return buffer_init(&self->ast_objects, allocator); + return_if_error(buffer_init(&self->ast_objects, allocator)); + return_if_error(hash_map_init(&self->named_objects, allocator, sizeof(Ast), hash_compare_string, amal_hash_string)); + return 0; +} + +int scope_add_child(Scope *self, Ast *child) { + BufferView child_name; + Ast existing_child; + bool child_already_exists; + + child_name = ast_get_name(child); + if(child_name.data) { + child_already_exists = hash_map_get(&self->named_objects, child_name, &existing_child); + if(child_already_exists) + return AST_ERR_DEF_DUP; + + cleanup_if_error(hash_map_insert(&self->named_objects, child_name, child)); + } + cleanup_if_error(buffer_append(&self->ast_objects, child, sizeof(Ast))); + return 0; + + cleanup: + return AST_ERR; } void scope_resolve(Scope *self) { @@ -74,6 +123,7 @@ void ast_resolve(Ast *self) { lhs_resolve(self->value.lhs_expr); break; default: + /* TODO: Implement all, and remove default case to give error when adding new ast type */ assert(bool_false && "ast_resolve not implemented for type"); break; } diff --git a/src/parser.c b/src/parser.c index 6ee273b..fcc29be 100644 --- a/src/parser.c +++ b/src/parser.c @@ -69,7 +69,8 @@ int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocato /* BODY_LOOP = BODY* @end_token */ -static THROWABLE parser_parse_body_loop(Parser *self, Buffer *body_list, Token end_token) { +static THROWABLE parser_parse_body_loop(Parser *self, Scope *scope, Token end_token) { + int result; for(;;) { Ast body_obj; bool is_end_token; @@ -78,7 +79,21 @@ static THROWABLE parser_parse_body_loop(Parser *self, Buffer *body_list, Token e break; try(parser_parse_body(self, &body_obj)); - throw_if_error(buffer_append(body_list, &body_obj, sizeof(body_obj))); + result = scope_add_child(scope, &body_obj); + if(result == 0) { + continue; + } else if(result == AST_ERR_DEF_DUP) { + /* TODO: Convert ast type to string for error message */ + BufferView obj_name; + obj_name = ast_get_name(&body_obj); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), + "A variable with the name %.*s was declared twice in the same scope", obj_name.size, obj_name.data); + self->error_context = ERROR_CONTEXT_NONE; + throw(result); + } else { + throw(result); + } } return PARSER_OK; } @@ -130,11 +145,15 @@ static THROWABLE parser_parse_lhs(Parser *self, LhsExpr **result, bool *assignme *assignment = bool_false; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(match && is_const) { - self->error = tokenizer_create_error(&self->tokenizer, "const variable declaration requires assignment (expected '=', got ';')"); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "const variable declaration requires assignment (expected '=', got ';')"); throw(PARSER_UNEXPECTED_TOKEN); } if(!match) { - self->error = tokenizer_create_error(&self->tokenizer, "Expected '=' or ';'"); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "Expected '=' or ';'"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; @@ -282,7 +301,9 @@ static THROWABLE parser_parse_rhs_single_expr(Parser *self, Ast *rhs_expr) { if(rhs_expr->type != AST_NONE) return PARSER_OK; - self->error = tokenizer_create_error(&self->tokenizer, "Expected string, variable or function call"); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "Expected string, variable or function call"); throw(PARSER_UNEXPECTED_TOKEN); } @@ -374,6 +395,7 @@ int parser_parse_rhs_start(Parser *self, Ast *rhs_expr) { self->error_context = ERROR_CONTEXT_RHS_START; try(parser_parse_rhs(self, rhs_expr)); + self->error_context = ERROR_CONTEXT_NONE; return PARSER_OK; } @@ -388,7 +410,9 @@ static THROWABLE parser_parse_body_semicolon(Parser *self, Ast *expr) { throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(!match) { /* TODO: Specify all the binop characters instead of "binop" which doesn't make sense for the user */ - self->error = tokenizer_create_error(&self->tokenizer, "Expected ';' or binop"); + self->error = tokenizer_create_error(&self->tokenizer, + tokenizer_get_error_index(&self->tokenizer), + "Expected ';' or binop"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; @@ -437,10 +461,10 @@ ROOT = BODY_LOOP */ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) { int result; - throw_if_error(tokenizer_init(&self->tokenizer, code_buffer, buffer_name)); + throw_if_error(tokenizer_init(&self->tokenizer, self->allocator, code_buffer, buffer_name)); result = setjmp(self->parse_env); if(result == 0) - try(parser_parse_body_loop(self, &self->scope.ast_objects, TOK_END_OF_FILE)); + try(parser_parse_body_loop(self, &self->scope, TOK_END_OF_FILE)); else if(self->error.str != NULL) { switch(self->error_context) { case ERROR_CONTEXT_NONE: diff --git a/src/std/hash_map.c b/src/std/hash_map.c index 9f58f6f..a649a95 100644 --- a/src/std/hash_map.c +++ b/src/std/hash_map.c @@ -7,7 +7,7 @@ Basic hash map implementation. TODO: Improve performance */ -#define HASH_MAP_INITIAL_SIZE 16 +#define HASH_MAP_INITIAL_SIZE 8 typedef struct HashMapBucketNode HashMapBucketNode; diff --git a/src/tokenizer.c b/src/tokenizer.c index 04e9d27..f2f52ad 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -2,6 +2,7 @@ #include "../include/std/mem.h" #include "../include/std/log.h" #include "../include/std/thread.h" +#include "../include/std/scoped_allocator.h" #include #include #include @@ -19,7 +20,7 @@ static int isAlphaDigit(int c) { return isAlpha(c) || isDigit(c); } -int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { +int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code, BufferView code_name) { assert(code.size <= INT_MAX); self->code = code; self->index = 0; @@ -29,6 +30,7 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { self->needs_update = bool_true; self->code_name = code_name.data ? code_name : create_buffer_view("", 8); self->number_is_integer = bool_false; + self->allocator = allocator; return 0; } @@ -548,14 +550,51 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { ignore_result_int(amal_mutex_unlock(mutex)); } +/* TODO: Optimize */ +static int tokenizer_get_line_by_index(Tokenizer *self, int index) { + int i; + int line; + if(index < 0 || index >= (int)self->code.size) + return -1; + + line = 1; + for(i = 0; i < index; ++i) { + if(self->code.data[i] == '\n') + ++line; + } + return line; +} + void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) { self->prev_index = error->index; - tokenizer_print_error(self, error->str); + self->line = tokenizer_get_line_by_index(self, self->prev_index); + tokenizer_print_error(self, "%s", error->str); } -TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str) { +TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...) { TokenizerError result; - result.index = tokenizer->prev_index; - result.str = err_str; + va_list args; + char buffer[1024]; + int bytes_copied; + + va_start(args, fmt); + bytes_copied = vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + if(bytes_copied < 0) + bytes_copied = 0; + + result.index = index; + result.str = NULL; + ignore_result_int(scoped_allocator_alloc(self->allocator, bytes_copied, (void**)&result.str)); + if(result.str) + am_memcpy(result.str, buffer, bytes_copied); return result; } + +int tokenizer_get_error_index(Tokenizer *self) { + return self->prev_index; +} + +int tokenizer_get_code_reference_index(Tokenizer *self, const char *ref) { + return ref - self->code.data; +} diff --git a/tests/main.amal b/tests/main.amal index a7b4576..786724c 100644 --- a/tests/main.amal +++ b/tests/main.amal @@ -10,6 +10,7 @@ const main = fn { const num2 = 23232; const num3 = num1 + num2 * 30; const num4 = (num1 + num2) * num3 * ((34 + 32) / 234.345); + const num4 = 23; } const print = fn { -- cgit v1.2.3