diff options
author | dec05eba <dec05eba@protonmail.com> | 2019-03-09 14:14:09 +0100 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2020-07-25 14:36:46 +0200 |
commit | a52fdf470aa2c164108aeccc2c83bad62208913c (patch) | |
tree | 9b4dea7aa0e53976958ac908d345f6f1914602d1 | |
parent | 255aa20f6d68a71c9eedd47998480a8b14a3be36 (diff) |
Start on resolving ast. Add recursive declaration check
-rw-r--r-- | include/ast.h | 25 | ||||
-rw-r--r-- | include/defs.h | 1 | ||||
-rw-r--r-- | include/parser.h | 4 | ||||
-rw-r--r-- | include/tokenizer.h | 3 | ||||
-rw-r--r-- | src/ast.c | 118 | ||||
-rw-r--r-- | src/compiler.c | 15 | ||||
-rw-r--r-- | src/parser.c | 4 | ||||
-rw-r--r-- | src/tokenizer.c | 63 |
8 files changed, 171 insertions, 62 deletions
diff --git a/include/ast.h b/include/ast.h index 1d40a89..f0020d7 100644 --- a/include/ast.h +++ b/include/ast.h @@ -1,6 +1,7 @@ #ifndef AMALGAM_AST_H #define AMALGAM_AST_H +#include "defs.h" #include "std/buffer_view.h" #include "std/buffer.h" #include "std/misc.h" @@ -8,6 +9,8 @@ #include "std/hash_map.h" #include "binop_type.h" +#include <setjmp.h> + /* General error */ #define AST_ERR -1 #define AST_ERR_DEF_DUP -20 @@ -45,9 +48,16 @@ typedef enum { AST_BINOP } AstType; +typedef enum { + AST_NOT_RESOLVED, + AST_RESOLVING, + AST_RESOLVED +} AstResolveStatus; + typedef struct { AstValue value; AstType type; + AstResolveStatus resolve_status; } Ast; struct Scope { @@ -56,17 +66,16 @@ struct Scope { }; struct FunctionDecl { - BufferView name; Scope body; }; struct FunctionCall { BufferView name; - Buffer args; + Buffer/*Ast*/ args; }; struct LhsExpr { - int isConst; + int is_const; BufferView type_name; BufferView var_name; Ast rhs_expr; @@ -90,6 +99,7 @@ struct Number { struct Variable { BufferView name; + Ast resolved_variable; }; struct Binop { @@ -100,7 +110,13 @@ struct Binop { bool grouped; }; +typedef struct { + jmp_buf env; + Parser *parser; +} AstCompilerContext; + Ast ast_none(); +void ast_init(Ast *self, AstValue value, AstType type); BufferView ast_get_name(Ast *self); CHECK_RESULT int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator); @@ -109,10 +125,11 @@ void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name); void import_init(Import *self, BufferView path); CHECK_RESULT int string_init(String *self, BufferView str); void number_init(Number *self, i64 value, bool is_integer); +void variable_init(Variable *self, BufferView name); void binop_init(Binop *self); CHECK_RESULT int scope_init(Scope *self, ScopedAllocator *allocator); CHECK_RESULT int scope_add_child(Scope *self, Ast *child); -void scope_resolve(Scope *self); +void scope_resolve(Scope *self, AstCompilerContext *context); #endif diff --git a/include/defs.h b/include/defs.h index 2baceb4..6a9bca4 100644 --- a/include/defs.h +++ b/include/defs.h @@ -3,5 +3,6 @@ typedef struct ParserThreadData ParserThreadData; typedef struct amal_compiler amal_compiler; +typedef struct Parser Parser; #endif diff --git a/include/parser.h b/include/parser.h index 5c055d9..20f02fe 100644 --- a/include/parser.h +++ b/include/parser.h @@ -32,7 +32,7 @@ typedef enum { ERROR_CONTEXT_RHS_START } ErrorContext; -typedef struct { +struct Parser { Tokenizer tokenizer; Scope scope; ScopedAllocator *allocator; /* borrowed. Copied from @compiler for faster access to allocator */ @@ -41,7 +41,7 @@ typedef struct { TokenizerError error; ErrorContext error_context; jmp_buf parse_env; -} Parser; +}; CHECK_RESULT int parser_thread_data_init(ParserThreadData *self); CHECK_RESULT int parser_thread_data_deinit(ParserThreadData *self); diff --git a/include/tokenizer.h b/include/tokenizer.h index b38cc58..a06b689 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -36,7 +36,6 @@ typedef struct { BufferView code; int index; int prev_index; - int line; Token token; /* @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again, @@ -68,7 +67,7 @@ CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); otherwise @result is set to 1 */ CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result); -void tokenizer_print_error(Tokenizer *self, const char *fmt, ...); +void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...); void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error); TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...); int tokenizer_get_error_index(Tokenizer *self); @@ -1,9 +1,19 @@ #include "../include/ast.h" +#include "../include/parser.h" #include "../include/std/log.h" #include "../include/std/hash.h" #include <assert.h> -static void ast_resolve(Ast *self); +#define throw(result) do { longjmp(context->env, (result)); } while(0) +#define throw_if_error(result) \ +do { \ + int return_if_result; \ + return_if_result = (result); \ + if((return_if_result) != 0) \ + throw(return_if_result); \ +} while(0) + +static void ast_resolve(Ast *self, AstCompilerContext *context); Ast ast_none() { Ast ast; @@ -12,10 +22,17 @@ Ast ast_none() { return ast; } +void ast_init(Ast *self, AstValue value, AstType type) { + self->value = value; + self->type = type; + self->resolve_status = AST_NOT_RESOLVED; +} + BufferView ast_get_name(Ast *self) { BufferView name; switch(self->type) { case AST_NONE: + case AST_FUNCTION_DECL: case AST_IMPORT: case AST_STRING: case AST_NUMBER: @@ -25,9 +42,6 @@ BufferView ast_get_name(Ast *self) { case AST_LHS: name = self->value.lhs_expr->var_name; break; - case AST_FUNCTION_DECL: - name = self->value.func_decl->name; - break; case AST_FUNCTION_CALL: name = self->value.func_call->name; break; @@ -39,7 +53,6 @@ BufferView ast_get_name(Ast *self) { } int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator) { - self->name = create_buffer_view_null(); return scope_init(&self->body, allocator); } @@ -49,7 +62,7 @@ int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocato } void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) { - self->isConst = isConst; + self->is_const = isConst; self->type_name = create_buffer_view_null(); self->var_name = var_name; self->rhs_expr = ast_none(); @@ -70,6 +83,11 @@ void number_init(Number *self, i64 value, bool is_integer) { self->is_integer = is_integer; } +void variable_init(Variable *self, BufferView name) { + self->name = name; + self->resolved_variable = ast_none(); +} + void binop_init(Binop *self) { self->lhs = ast_none(); self->rhs = ast_none(); @@ -103,29 +121,97 @@ int scope_add_child(Scope *self, Ast *child) { return AST_ERR; } -void scope_resolve(Scope *self) { +void scope_resolve(Scope *self, AstCompilerContext *context) { Ast *ast; Ast *ast_end; ast = buffer_start(&self->ast_objects); ast_end = buffer_end(&self->ast_objects); for(; ast != ast_end; ++ast) { - ast_resolve(ast); + ast_resolve(ast, context); + } +} + +static void lhs_resolve(LhsExpr *self, AstCompilerContext *context) { + /* TODO: Implement */ + amal_log_debug("Lhs resolve %s name: %.*s, type: %.*s", + self->is_const ? "const" : "var", + self->var_name.size, self->var_name.data, + self->type_name.size, self->type_name.data); + ast_resolve(&self->rhs_expr, context); +} + +static void funcdecl_resolve(FunctionDecl *self, AstCompilerContext *context) { + /* TODO: Implement */ + amal_log_debug("funcdecl resolve"); + scope_resolve(&self->body, context); +} + +static void funccall_resolve(FunctionCall *self, AstCompilerContext *context) { + /* TODO: Implement */ + Ast *ast; + Ast *ast_end; + ast = buffer_start(&self->args); + ast_end = buffer_end(&self->args); + amal_log_debug("funccall resolve, func name: %.*s", self->name.size, self->name.data); + for(; ast != ast_end; ++ast) { + ast_resolve(ast, context); } } -static void lhs_resolve(LhsExpr *self) { - amal_log_debug("Lhs resolve var name: %.*s", self->var_name.size, self->var_name.data); - /*ast_resolve(&self->rhs_expr);*/ +static void variable_resolve(Variable *self, AstCompilerContext *context) { + /* TODO: Implement */ + amal_log_debug("variable resolve, var name: %.*s", self->name.size, self->name.data); + (void)context; } -void ast_resolve(Ast *self) { +static void binop_resolve(Binop *self, AstCompilerContext *context) { + /* TODO: Implement */ + ast_resolve(&self->lhs, context); + ast_resolve(&self->rhs, context); +} + +static BufferView ast_get_code_reference(Ast *self) { + return ast_get_name(self); +} + +void ast_resolve(Ast *self, AstCompilerContext *context) { + if(self->resolve_status == AST_RESOLVED) { + return; + } else if(self->resolve_status == AST_RESOLVING) { + tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data); + tokenizer_print_error(&context->parser->tokenizer, + tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data), + "Found recursive declaration"); + throw(AST_ERR); + } + + self->resolve_status = AST_RESOLVING; switch(self->type) { + case AST_NONE: + case AST_NUMBER: + /* Nothing to resolve for numbers */ + break; + case AST_FUNCTION_DECL: + funcdecl_resolve(self->value.func_decl, context); + break; + case AST_FUNCTION_CALL: + funccall_resolve(self->value.func_call, context); + break; case AST_LHS: - lhs_resolve(self->value.lhs_expr); + lhs_resolve(self->value.lhs_expr, context); break; - default: - /* TODO: Implement all, and remove default case to give error when adding new ast type */ - assert(bool_false && "ast_resolve not implemented for type"); + case AST_IMPORT: + /* TODO: When @import(...).data syntax is added, implement the resolve for it */ + break; + case AST_STRING: + /* TODO: Convert special combinations. For example \n to newline */ + break; + case AST_VARIABLE: + variable_resolve(self->value.variable, context); + break; + case AST_BINOP: + binop_resolve(self->value.binop, context); break; } + self->resolve_status = AST_RESOLVED; } diff --git a/src/compiler.c b/src/compiler.c index bcb36c3..d3e4e80 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -144,16 +144,25 @@ static void* thread_callback_parse_file(void *userdata) { /* TODO: Handle errors (stop resolving ast in all other threads and report errors/warnings) */ static void* thread_callback_resolve_ast(void *userdata) { - Parser *parser; CompilerAstResolverThreadUserData compiler_ast_resolver_userdata; + Parser *parser; + AstCompilerContext compiler_context; assert(!amal_thread_is_main()); am_memcpy(&compiler_ast_resolver_userdata, userdata, sizeof(compiler_ast_resolver_userdata)); am_free(userdata); parser = compiler_ast_resolver_userdata.parser; + compiler_context.parser = parser; for(;;) { + int result; amal_log_debug("Resolving AST for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data); - scope_resolve(&parser->scope); + result = setjmp(compiler_context.env); + if(result == 0) + scope_resolve(&parser->scope, &compiler_context); + else { + /* TODO: stop resolving ast in all other threads */ + break; + } cleanup_if_error(amal_mutex_lock(&compiler_ast_resolver_userdata.compiler->mutex, "thread_callback_resolve_ast")); if(compiler_ast_resolver_userdata.compiler->resolve_ast_index + 1 >= (int)buffer_get_size(&compiler_ast_resolver_userdata.compiler->parsers, Parser)) break; @@ -284,7 +293,7 @@ static CHECK_RESULT int amal_compiler_resolve_ast(amal_compiler *self) { thread_work_data.type = THREAD_WORK_RESOLVE_AST; thread_work_data.value.parser = parser; return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &thread_selected)); - /* After all threads have been used, they will handle using the remaining parsers */ + /* After all threads have been used, they will handle using the remaining parsers or stop if there is an error */ if(!thread_selected) break; } diff --git a/src/parser.c b/src/parser.c index fcc29be..b91e15a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -231,7 +231,7 @@ static THROWABLE parser_parse_function_call_or_variable(Parser *self, Ast *expr) if(!match) { Variable *variable; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable)); - variable->name = identifier; + variable_init(variable, identifier); expr->type = AST_VARIABLE; expr->value.variable = variable; return PARSER_OK; @@ -471,7 +471,7 @@ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_ tokenizer_print_error_object(&self->tokenizer, &self->error); break; case ERROR_CONTEXT_RHS_START: - tokenizer_print_error(&self->tokenizer, "Expected string, variable, closure, function call or import"); + tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected string, variable, closure, function call or import"); break; default: assert(bool_false && "Error context handling not implemented"); diff --git a/src/tokenizer.c b/src/tokenizer.c index b2bd6c5..4f1871b 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -30,7 +30,6 @@ int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code, self->code = code; self->index = 0; self->prev_index = 0; - self->line = 1; self->token = TOK_NONE; self->needs_update = bool_true; self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8); @@ -53,8 +52,6 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) { c = self->code.data[self->index]; switch(c) { case '\n': - ++self->line; - /* fallthrough */ case ' ': case '\t': break; @@ -183,7 +180,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { ++self->index; string_end = find_end_of_string(self->code, self->index); if(string_end == -1) { - tokenizer_print_error(self, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?"); + tokenizer_print_error(self, self->prev_index, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?"); return TOKENIZER_ERR; } @@ -218,7 +215,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { result = string_to_integer_unchecked(number_str, &self->value.integer); if(result != 0) { /* TODO */ - tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data); + tokenizer_print_error(self, self->prev_index, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data); return TOKENIZER_ERR; } self->number_is_integer = bool_true; @@ -227,7 +224,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { result = string_to_float_unchecked(number_str, &self->value.floating); if(result != 0) { /* TODO */ - tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data); + tokenizer_print_error(self, self->prev_index, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data); return TOKENIZER_ERR; } self->number_is_integer = bool_false; @@ -258,7 +255,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { ++self->index; self->index = tokenizer_get_end_of_multiline_comment(self, self->index); if(self->index == -1) { - tokenizer_print_error(self, "End of multiline comment not found"); + tokenizer_print_error(self, self->prev_index, "End of multiline comment not found"); return TOKENIZER_ERR; } return __tokenizer_next(self, token); @@ -325,10 +322,10 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { return TOKENIZER_OK; import_error: - tokenizer_print_error(self, err_msg); + tokenizer_print_error(self, self->prev_index, err_msg); return TOKENIZER_ERR; } else { - tokenizer_print_error(self, "Unexpected symbol '%c'", c); + tokenizer_print_error(self, self->prev_index, "Unexpected symbol '%c'", c); return TOKENIZER_UNEXPECTED_TOKEN; } return TOKENIZER_OK; @@ -506,7 +503,7 @@ int tokenizer_accept(Tokenizer *self, Token expected_token) { expected_token_str = tokenizer_expected_token_as_string(expected_token); actual_token_str = tokenizer_actual_token_as_string(self); - tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data); + tokenizer_print_error(self, self->prev_index, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data); return TOKENIZER_UNEXPECTED_TOKEN; } @@ -571,8 +568,24 @@ int tokenizer_get_end_of_multiline_comment(Tokenizer *self, int index) { return -1; } -void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { +/* TODO: Optimize */ +static int tokenizer_get_line_by_index(Tokenizer *self, int index) { + int i; + int line; + if(index < 0 || index >= (int)self->code.size) + return -1; + + line = 1; + for(i = 0; i < index; ++i) { + if(self->code.data[i] == '\n') + ++line; + } + return line; +} + +void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...) { va_list args; + int line; int line_start; int line_end; int prev_column; @@ -582,10 +595,11 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { mutex = amal_log_get_mutex(); ignore_result_int(amal_mutex_lock(mutex, "tokenizer_print_error")); va_start(args, fmt); - line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index); - line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index); - prev_column = self->prev_index - line_start; - fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, self->line, 1 + prev_column); + line = tokenizer_get_line_by_index(self, index); + line_start = tokenizer_get_start_of_line_from_index(self, index); + line_end = tokenizer_get_end_of_line_from_index(self, index); + prev_column = index - line_start; + fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, line, 1 + prev_column); vfprintf(stderr, fmt, args); fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start); for(i = 0; i < prev_column; ++i) @@ -595,25 +609,8 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { ignore_result_int(amal_mutex_unlock(mutex)); } -/* TODO: Optimize */ -static int tokenizer_get_line_by_index(Tokenizer *self, int index) { - int i; - int line; - if(index < 0 || index >= (int)self->code.size) - return -1; - - line = 1; - for(i = 0; i < index; ++i) { - if(self->code.data[i] == '\n') - ++line; - } - return line; -} - void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) { - self->prev_index = error->index; - self->line = tokenizer_get_line_by_index(self, self->prev_index); - tokenizer_print_error(self, "%s", error->str); + tokenizer_print_error(self, error->index, "%s", error->str); } TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...) { |