Start on resolving ast. Add recursive declaration check

author: dec05eba <dec05eba@protonmail.com> 2019-03-09 14:14:09 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-07-25 14:36:46 +0200
commit: a52fdf470aa2c164108aeccc2c83bad62208913c (patch)
tree: 9b4dea7aa0e53976958ac908d345f6f1914602d1
parent: 255aa20f6d68a71c9eedd47998480a8b14a3be36 (diff)
8 files changed, 171 insertions, 62 deletions
diff --git a/include/ast.h b/include/ast.h
index 1d40a89..f0020d7 100644
--- a/include/ast.h
+++ b/include/ast.h
@@ -1,6 +1,7 @@
 #ifndef AMALGAM_AST_H
 #define AMALGAM_AST_H
 
+#include "defs.h"
 #include "std/buffer_view.h"
 #include "std/buffer.h"
 #include "std/misc.h"
@@ -8,6 +9,8 @@
 #include "std/hash_map.h"
 #include "binop_type.h"
 
+#include <setjmp.h>
+
 /* General error */
 #define AST_ERR -1
 #define AST_ERR_DEF_DUP -20
@@ -45,9 +48,16 @@ typedef enum {
     AST_BINOP
 } AstType;
 
+typedef enum {
+    AST_NOT_RESOLVED,
+    AST_RESOLVING,
+    AST_RESOLVED
+} AstResolveStatus;
+
 typedef struct {
     AstValue value;
     AstType type;
+    AstResolveStatus resolve_status;
 } Ast;
 
 struct Scope {
@@ -56,17 +66,16 @@ struct Scope {
 };
 
 struct FunctionDecl {
-    BufferView name;
     Scope body;
 };
 
 struct FunctionCall {
     BufferView name;
-    Buffer args;
+    Buffer/*Ast*/ args;
 };
 
 struct LhsExpr {
-    int isConst;
+    int is_const;
     BufferView type_name;
     BufferView var_name;
     Ast rhs_expr;
@@ -90,6 +99,7 @@ struct Number {
 
 struct Variable {
     BufferView name;
+    Ast resolved_variable;
 };
 
 struct Binop {
@@ -100,7 +110,13 @@ struct Binop {
     bool grouped;
 };
 
+typedef struct {
+    jmp_buf env;
+    Parser *parser;
+} AstCompilerContext;
+
 Ast ast_none();
+void ast_init(Ast *self, AstValue value, AstType type);
 BufferView ast_get_name(Ast *self);
 
 CHECK_RESULT int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator);
@@ -109,10 +125,11 @@ void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name);
 void import_init(Import *self, BufferView path);
 CHECK_RESULT int string_init(String *self, BufferView str);
 void number_init(Number *self, i64 value, bool is_integer);
+void variable_init(Variable *self, BufferView name);
 void binop_init(Binop *self);
 
 CHECK_RESULT int scope_init(Scope *self, ScopedAllocator *allocator);
 CHECK_RESULT int scope_add_child(Scope *self, Ast *child);
-void scope_resolve(Scope *self);
+void scope_resolve(Scope *self, AstCompilerContext *context);
 
 #endif
diff --git a/include/defs.h b/include/defs.h
index 2baceb4..6a9bca4 100644
--- a/include/defs.h
+++ b/include/defs.h
@@ -3,5 +3,6 @@
 
 typedef struct ParserThreadData ParserThreadData;
 typedef struct amal_compiler amal_compiler;
+typedef struct Parser Parser;
 
 #endif
diff --git a/include/parser.h b/include/parser.h
index 5c055d9..20f02fe 100644
--- a/include/parser.h
+++ b/include/parser.h
@@ -32,7 +32,7 @@ typedef enum {
     ERROR_CONTEXT_RHS_START
 } ErrorContext;
 
-typedef struct {
+struct Parser {
     Tokenizer tokenizer;
     Scope scope;
     ScopedAllocator *allocator; /* borrowed. Copied from @compiler for faster access to allocator */
@@ -41,7 +41,7 @@ typedef struct {
     TokenizerError error;
     ErrorContext error_context;
     jmp_buf parse_env;
-} Parser;
+};
 
 CHECK_RESULT int parser_thread_data_init(ParserThreadData *self);
 CHECK_RESULT int parser_thread_data_deinit(ParserThreadData *self);
diff --git a/include/tokenizer.h b/include/tokenizer.h
index b38cc58..a06b689 100644
--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@@ -36,7 +36,6 @@ typedef struct {
     BufferView code;
     int index;
     int prev_index;
-    int line;
     Token token;
     /*
     @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again,
@@ -68,7 +67,7 @@ CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
    otherwise @result is set to 1
 */
 CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
-void tokenizer_print_error(Tokenizer *self, const char *fmt, ...);
+void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...);
 void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error);
 TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...);
 int tokenizer_get_error_index(Tokenizer *self);
diff --git a/src/ast.c b/src/ast.c
index 5314e90..0211d91 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -1,9 +1,19 @@
 #include "../include/ast.h"
+#include "../include/parser.h"
 #include "../include/std/log.h"
 #include "../include/std/hash.h"
 #include <assert.h>
 
-static void ast_resolve(Ast *self);
+#define throw(result) do { longjmp(context->env, (result)); } while(0)
+#define throw_if_error(result) \
+do { \
+    int return_if_result; \
+    return_if_result = (result); \
+    if((return_if_result) != 0) \
+        throw(return_if_result); \
+} while(0)
+
+static void ast_resolve(Ast *self, AstCompilerContext *context);
 
 Ast ast_none() {
     Ast ast;
@@ -12,10 +22,17 @@ Ast ast_none() {
     return ast;
 }
 
+void ast_init(Ast *self, AstValue value, AstType type) {
+    self->value = value;
+    self->type = type;
+    self->resolve_status = AST_NOT_RESOLVED;
+}
+
 BufferView ast_get_name(Ast *self) {
     BufferView name;
     switch(self->type) {
         case AST_NONE:
+        case AST_FUNCTION_DECL:
         case AST_IMPORT:
         case AST_STRING:
         case AST_NUMBER:
@@ -25,9 +42,6 @@ BufferView ast_get_name(Ast *self) {
         case AST_LHS:
             name = self->value.lhs_expr->var_name;
             break;
-        case AST_FUNCTION_DECL:
-            name = self->value.func_decl->name;
-            break;
         case AST_FUNCTION_CALL:
             name = self->value.func_call->name;
             break;
@@ -39,7 +53,6 @@ BufferView ast_get_name(Ast *self) {
 }
 
 int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator) {
-    self->name = create_buffer_view_null();
     return scope_init(&self->body, allocator);
 }
 
@@ -49,7 +62,7 @@ int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocato
 }
 
 void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
-    self->isConst = isConst;
+    self->is_const = isConst;
     self->type_name = create_buffer_view_null();
     self->var_name = var_name;
     self->rhs_expr = ast_none();
@@ -70,6 +83,11 @@ void number_init(Number *self, i64 value, bool is_integer) {
     self->is_integer = is_integer;
 }
 
+void variable_init(Variable *self, BufferView name) {
+    self->name = name;
+    self->resolved_variable = ast_none();
+}
+
 void binop_init(Binop *self) {
     self->lhs = ast_none();
     self->rhs = ast_none();
@@ -103,29 +121,97 @@ int scope_add_child(Scope *self, Ast *child) {
     return AST_ERR;
 }
 
-void scope_resolve(Scope *self) {
+void scope_resolve(Scope *self, AstCompilerContext *context) {
     Ast *ast;
     Ast *ast_end;
     ast = buffer_start(&self->ast_objects);
     ast_end = buffer_end(&self->ast_objects);
     for(; ast != ast_end; ++ast) {
-        ast_resolve(ast);
+        ast_resolve(ast, context);
+    }
+}
+
+static void lhs_resolve(LhsExpr *self, AstCompilerContext *context) {
+    /* TODO: Implement */
+    amal_log_debug("Lhs resolve %s name: %.*s, type: %.*s", 
+        self->is_const ? "const" : "var",
+        self->var_name.size, self->var_name.data,
+        self->type_name.size, self->type_name.data);
+    ast_resolve(&self->rhs_expr, context);
+}
+
+static void funcdecl_resolve(FunctionDecl *self, AstCompilerContext *context) {
+    /* TODO: Implement */
+    amal_log_debug("funcdecl resolve");
+    scope_resolve(&self->body, context);
+}
+
+static void funccall_resolve(FunctionCall *self, AstCompilerContext *context) {
+    /* TODO: Implement */
+    Ast *ast;
+    Ast *ast_end;
+    ast = buffer_start(&self->args);
+    ast_end = buffer_end(&self->args);
+    amal_log_debug("funccall resolve, func name: %.*s", self->name.size, self->name.data);
+    for(; ast != ast_end; ++ast) {
+        ast_resolve(ast, context);
     }
 }
 
-static void lhs_resolve(LhsExpr *self) {
-    amal_log_debug("Lhs resolve var name: %.*s", self->var_name.size, self->var_name.data);
-    /*ast_resolve(&self->rhs_expr);*/
+static void variable_resolve(Variable *self, AstCompilerContext *context) {
+    /* TODO: Implement */
+    amal_log_debug("variable resolve, var name: %.*s", self->name.size, self->name.data);
+    (void)context;
 }
 
-void ast_resolve(Ast *self) {
+static void binop_resolve(Binop *self, AstCompilerContext *context) {
+    /* TODO: Implement */
+    ast_resolve(&self->lhs, context);
+    ast_resolve(&self->rhs, context);
+}
+
+static BufferView ast_get_code_reference(Ast *self) {
+    return ast_get_name(self);
+}
+
+void ast_resolve(Ast *self, AstCompilerContext *context) {
+    if(self->resolve_status == AST_RESOLVED) {
+        return;
+    } else if(self->resolve_status == AST_RESOLVING) {
+        tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data);
+        tokenizer_print_error(&context->parser->tokenizer,
+                              tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data),
+                              "Found recursive declaration");
+        throw(AST_ERR);
+    }
+
+    self->resolve_status = AST_RESOLVING;
     switch(self->type) {
+        case AST_NONE:
+        case AST_NUMBER:
+            /* Nothing to resolve for numbers */
+            break;
+        case AST_FUNCTION_DECL:
+            funcdecl_resolve(self->value.func_decl, context);
+            break;
+        case AST_FUNCTION_CALL:
+            funccall_resolve(self->value.func_call, context);
+            break;
         case AST_LHS:
-            lhs_resolve(self->value.lhs_expr);
+            lhs_resolve(self->value.lhs_expr, context);
             break;
-        default:
-            /* TODO: Implement all, and remove default case to give error when adding new ast type */
-            assert(bool_false && "ast_resolve not implemented for type");
+        case AST_IMPORT:
+            /* TODO: When @import(...).data syntax is added, implement the resolve for it */
+            break;
+        case AST_STRING:
+            /* TODO: Convert special combinations. For example \n to newline */
+            break;
+        case AST_VARIABLE:
+            variable_resolve(self->value.variable, context);
+            break;
+        case AST_BINOP:
+            binop_resolve(self->value.binop, context);
             break;
     }
+    self->resolve_status = AST_RESOLVED;
 }
diff --git a/src/compiler.c b/src/compiler.c
index bcb36c3..d3e4e80 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -144,16 +144,25 @@ static void* thread_callback_parse_file(void *userdata) {
 
 /* TODO: Handle errors (stop resolving ast in all other threads and report errors/warnings) */
 static void* thread_callback_resolve_ast(void *userdata) {
-    Parser *parser;
     CompilerAstResolverThreadUserData compiler_ast_resolver_userdata;
+    Parser *parser;
+    AstCompilerContext compiler_context;
     assert(!amal_thread_is_main());
     
     am_memcpy(&compiler_ast_resolver_userdata, userdata, sizeof(compiler_ast_resolver_userdata));
     am_free(userdata);
     parser = compiler_ast_resolver_userdata.parser;
+    compiler_context.parser = parser;
     for(;;) {
+        int result;
         amal_log_debug("Resolving AST for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data);
-        scope_resolve(&parser->scope);
+        result = setjmp(compiler_context.env);
+        if(result == 0)
+            scope_resolve(&parser->scope, &compiler_context);
+        else {
+            /* TODO: stop resolving ast in all other threads */
+            break;
+        }
         cleanup_if_error(amal_mutex_lock(&compiler_ast_resolver_userdata.compiler->mutex, "thread_callback_resolve_ast"));
         if(compiler_ast_resolver_userdata.compiler->resolve_ast_index + 1 >= (int)buffer_get_size(&compiler_ast_resolver_userdata.compiler->parsers, Parser))
             break;
@@ -284,7 +293,7 @@ static CHECK_RESULT int amal_compiler_resolve_ast(amal_compiler *self) {
         thread_work_data.type = THREAD_WORK_RESOLVE_AST;
         thread_work_data.value.parser = parser;
         return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &thread_selected));
-        /* After all threads have been used, they will handle using the remaining parsers */
+        /* After all threads have been used, they will handle using the remaining parsers or stop if there is an error */
         if(!thread_selected)
             break;
     }
diff --git a/src/parser.c b/src/parser.c
index fcc29be..b91e15a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -231,7 +231,7 @@ static THROWABLE parser_parse_function_call_or_variable(Parser *self, Ast *expr)
     if(!match) {
         Variable *variable;
         throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable));
-        variable->name = identifier;
+        variable_init(variable, identifier);
         expr->type = AST_VARIABLE;
         expr->value.variable = variable;
         return PARSER_OK;
@@ -471,7 +471,7 @@ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_
                 tokenizer_print_error_object(&self->tokenizer, &self->error);
                 break;
             case ERROR_CONTEXT_RHS_START:
-                tokenizer_print_error(&self->tokenizer, "Expected string, variable, closure, function call or import");
+                tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected string, variable, closure, function call or import");
                 break;
             default:
                 assert(bool_false && "Error context handling not implemented");
diff --git a/src/tokenizer.c b/src/tokenizer.c
index b2bd6c5..4f1871b 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -30,7 +30,6 @@ int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code,
     self->code = code;
     self->index = 0;
     self->prev_index = 0;
-    self->line = 1;
     self->token = TOK_NONE;
     self->needs_update = bool_true;
     self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8);
@@ -53,8 +52,6 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) {
         c = self->code.data[self->index];
         switch(c) {
             case '\n':
-                ++self->line;
-                /* fallthrough */
             case ' ':
             case '\t':
                 break;
@@ -183,7 +180,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
         ++self->index;
         string_end = find_end_of_string(self->code, self->index);
         if(string_end == -1) {
-            tokenizer_print_error(self, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?");
+            tokenizer_print_error(self, self->prev_index, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?");
             return TOKENIZER_ERR;
         }
 
@@ -218,7 +215,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
             result = string_to_integer_unchecked(number_str, &self->value.integer);
             if(result != 0) {
                 /* TODO */
-                tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
+                tokenizer_print_error(self, self->prev_index, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
                 return TOKENIZER_ERR;
             }
             self->number_is_integer = bool_true;
@@ -227,7 +224,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
             result = string_to_float_unchecked(number_str, &self->value.floating);
             if(result != 0) {
                 /* TODO */
-                tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
+                tokenizer_print_error(self, self->prev_index, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
                 return TOKENIZER_ERR;
             }
             self->number_is_integer = bool_false;
@@ -258,7 +255,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
                 ++self->index;
                 self->index = tokenizer_get_end_of_multiline_comment(self, self->index);
                 if(self->index == -1) {
-                    tokenizer_print_error(self, "End of multiline comment not found");
+                    tokenizer_print_error(self, self->prev_index, "End of multiline comment not found");
                     return TOKENIZER_ERR;
                 }
                 return __tokenizer_next(self, token);
@@ -325,10 +322,10 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
         return TOKENIZER_OK;
         
         import_error:
-        tokenizer_print_error(self, err_msg);
+        tokenizer_print_error(self, self->prev_index, err_msg);
         return TOKENIZER_ERR;
     } else {
-        tokenizer_print_error(self, "Unexpected symbol '%c'", c);
+        tokenizer_print_error(self, self->prev_index, "Unexpected symbol '%c'", c);
         return TOKENIZER_UNEXPECTED_TOKEN;
     }
     return TOKENIZER_OK;
@@ -506,7 +503,7 @@ int tokenizer_accept(Tokenizer *self, Token expected_token) {
 
     expected_token_str = tokenizer_expected_token_as_string(expected_token);
     actual_token_str = tokenizer_actual_token_as_string(self);
-    tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
+    tokenizer_print_error(self, self->prev_index, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
     return TOKENIZER_UNEXPECTED_TOKEN;
 }
 
@@ -571,8 +568,24 @@ int tokenizer_get_end_of_multiline_comment(Tokenizer *self, int index) {
     return -1;
 }
 
-void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
+/* TODO: Optimize */
+static int tokenizer_get_line_by_index(Tokenizer *self, int index) {
+    int i;
+    int line;
+    if(index < 0 || index >= (int)self->code.size)
+        return -1;
+    
+    line = 1;
+    for(i = 0; i < index; ++i) {
+        if(self->code.data[i] == '\n')
+            ++line;
+    }
+    return line;
+}
+
+void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...) {
     va_list args;
+    int line;
     int line_start;
     int line_end;
     int prev_column;
@@ -582,10 +595,11 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
     mutex = amal_log_get_mutex();
     ignore_result_int(amal_mutex_lock(mutex, "tokenizer_print_error"));
     va_start(args, fmt);
-    line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index);
-    line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index);
-    prev_column = self->prev_index - line_start;
-    fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, self->line, 1 + prev_column);
+    line = tokenizer_get_line_by_index(self, index);
+    line_start = tokenizer_get_start_of_line_from_index(self, index);
+    line_end = tokenizer_get_end_of_line_from_index(self, index);
+    prev_column = index - line_start;
+    fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, line, 1 + prev_column);
     vfprintf(stderr, fmt, args);
     fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start);
     for(i = 0; i < prev_column; ++i)
@@ -595,25 +609,8 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
     ignore_result_int(amal_mutex_unlock(mutex));
 }
 
-/* TODO: Optimize */
-static int tokenizer_get_line_by_index(Tokenizer *self, int index) {
-    int i;
-    int line;
-    if(index < 0 || index >= (int)self->code.size)
-        return -1;
-    
-    line = 1;
-    for(i = 0; i < index; ++i) {
-        if(self->code.data[i] == '\n')
-            ++line;
-    }
-    return line;
-}
-
 void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) {
-    self->prev_index = error->index;
-    self->line = tokenizer_get_line_by_index(self, self->prev_index);
-    tokenizer_print_error(self, "%s", error->str);
+    tokenizer_print_error(self, error->index, "%s", error->str);
 }
 
 TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...) {
author	dec05eba <dec05eba@protonmail.com>	2019-03-09 14:14:09 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-07-25 14:36:46 +0200
commit	a52fdf470aa2c164108aeccc2c83bad62208913c (patch)
tree	9b4dea7aa0e53976958ac908d345f6f1914602d1
parent	255aa20f6d68a71c9eedd47998480a8b14a3be36 (diff)