aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-03-09 14:14:09 +0100
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commita52fdf470aa2c164108aeccc2c83bad62208913c (patch)
tree9b4dea7aa0e53976958ac908d345f6f1914602d1
parent255aa20f6d68a71c9eedd47998480a8b14a3be36 (diff)
Start on resolving ast. Add recursive declaration check
-rw-r--r--include/ast.h25
-rw-r--r--include/defs.h1
-rw-r--r--include/parser.h4
-rw-r--r--include/tokenizer.h3
-rw-r--r--src/ast.c118
-rw-r--r--src/compiler.c15
-rw-r--r--src/parser.c4
-rw-r--r--src/tokenizer.c63
8 files changed, 171 insertions, 62 deletions
diff --git a/include/ast.h b/include/ast.h
index 1d40a89..f0020d7 100644
--- a/include/ast.h
+++ b/include/ast.h
@@ -1,6 +1,7 @@
#ifndef AMALGAM_AST_H
#define AMALGAM_AST_H
+#include "defs.h"
#include "std/buffer_view.h"
#include "std/buffer.h"
#include "std/misc.h"
@@ -8,6 +9,8 @@
#include "std/hash_map.h"
#include "binop_type.h"
+#include <setjmp.h>
+
/* General error */
#define AST_ERR -1
#define AST_ERR_DEF_DUP -20
@@ -45,9 +48,16 @@ typedef enum {
AST_BINOP
} AstType;
+typedef enum {
+ AST_NOT_RESOLVED,
+ AST_RESOLVING,
+ AST_RESOLVED
+} AstResolveStatus;
+
typedef struct {
AstValue value;
AstType type;
+ AstResolveStatus resolve_status;
} Ast;
struct Scope {
@@ -56,17 +66,16 @@ struct Scope {
};
struct FunctionDecl {
- BufferView name;
Scope body;
};
struct FunctionCall {
BufferView name;
- Buffer args;
+ Buffer/*Ast*/ args;
};
struct LhsExpr {
- int isConst;
+ int is_const;
BufferView type_name;
BufferView var_name;
Ast rhs_expr;
@@ -90,6 +99,7 @@ struct Number {
struct Variable {
BufferView name;
+ Ast resolved_variable;
};
struct Binop {
@@ -100,7 +110,13 @@ struct Binop {
bool grouped;
};
+typedef struct {
+ jmp_buf env;
+ Parser *parser;
+} AstCompilerContext;
+
Ast ast_none();
+void ast_init(Ast *self, AstValue value, AstType type);
BufferView ast_get_name(Ast *self);
CHECK_RESULT int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator);
@@ -109,10 +125,11 @@ void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name);
void import_init(Import *self, BufferView path);
CHECK_RESULT int string_init(String *self, BufferView str);
void number_init(Number *self, i64 value, bool is_integer);
+void variable_init(Variable *self, BufferView name);
void binop_init(Binop *self);
CHECK_RESULT int scope_init(Scope *self, ScopedAllocator *allocator);
CHECK_RESULT int scope_add_child(Scope *self, Ast *child);
-void scope_resolve(Scope *self);
+void scope_resolve(Scope *self, AstCompilerContext *context);
#endif
diff --git a/include/defs.h b/include/defs.h
index 2baceb4..6a9bca4 100644
--- a/include/defs.h
+++ b/include/defs.h
@@ -3,5 +3,6 @@
typedef struct ParserThreadData ParserThreadData;
typedef struct amal_compiler amal_compiler;
+typedef struct Parser Parser;
#endif
diff --git a/include/parser.h b/include/parser.h
index 5c055d9..20f02fe 100644
--- a/include/parser.h
+++ b/include/parser.h
@@ -32,7 +32,7 @@ typedef enum {
ERROR_CONTEXT_RHS_START
} ErrorContext;
-typedef struct {
+struct Parser {
Tokenizer tokenizer;
Scope scope;
ScopedAllocator *allocator; /* borrowed. Copied from @compiler for faster access to allocator */
@@ -41,7 +41,7 @@ typedef struct {
TokenizerError error;
ErrorContext error_context;
jmp_buf parse_env;
-} Parser;
+};
CHECK_RESULT int parser_thread_data_init(ParserThreadData *self);
CHECK_RESULT int parser_thread_data_deinit(ParserThreadData *self);
diff --git a/include/tokenizer.h b/include/tokenizer.h
index b38cc58..a06b689 100644
--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@@ -36,7 +36,6 @@ typedef struct {
BufferView code;
int index;
int prev_index;
- int line;
Token token;
/*
@needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again,
@@ -68,7 +67,7 @@ CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
otherwise @result is set to 1
*/
CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
-void tokenizer_print_error(Tokenizer *self, const char *fmt, ...);
+void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...);
void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error);
TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...);
int tokenizer_get_error_index(Tokenizer *self);
diff --git a/src/ast.c b/src/ast.c
index 5314e90..0211d91 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -1,9 +1,19 @@
#include "../include/ast.h"
+#include "../include/parser.h"
#include "../include/std/log.h"
#include "../include/std/hash.h"
#include <assert.h>
-static void ast_resolve(Ast *self);
+#define throw(result) do { longjmp(context->env, (result)); } while(0)
+#define throw_if_error(result) \
+do { \
+ int return_if_result; \
+ return_if_result = (result); \
+ if((return_if_result) != 0) \
+ throw(return_if_result); \
+} while(0)
+
+static void ast_resolve(Ast *self, AstCompilerContext *context);
Ast ast_none() {
Ast ast;
@@ -12,10 +22,17 @@ Ast ast_none() {
return ast;
}
+void ast_init(Ast *self, AstValue value, AstType type) {
+ self->value = value;
+ self->type = type;
+ self->resolve_status = AST_NOT_RESOLVED;
+}
+
BufferView ast_get_name(Ast *self) {
BufferView name;
switch(self->type) {
case AST_NONE:
+ case AST_FUNCTION_DECL:
case AST_IMPORT:
case AST_STRING:
case AST_NUMBER:
@@ -25,9 +42,6 @@ BufferView ast_get_name(Ast *self) {
case AST_LHS:
name = self->value.lhs_expr->var_name;
break;
- case AST_FUNCTION_DECL:
- name = self->value.func_decl->name;
- break;
case AST_FUNCTION_CALL:
name = self->value.func_call->name;
break;
@@ -39,7 +53,6 @@ BufferView ast_get_name(Ast *self) {
}
int funcdecl_init(FunctionDecl *self, ScopedAllocator *allocator) {
- self->name = create_buffer_view_null();
return scope_init(&self->body, allocator);
}
@@ -49,7 +62,7 @@ int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocato
}
void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
- self->isConst = isConst;
+ self->is_const = isConst;
self->type_name = create_buffer_view_null();
self->var_name = var_name;
self->rhs_expr = ast_none();
@@ -70,6 +83,11 @@ void number_init(Number *self, i64 value, bool is_integer) {
self->is_integer = is_integer;
}
+void variable_init(Variable *self, BufferView name) {
+ self->name = name;
+ self->resolved_variable = ast_none();
+}
+
void binop_init(Binop *self) {
self->lhs = ast_none();
self->rhs = ast_none();
@@ -103,29 +121,97 @@ int scope_add_child(Scope *self, Ast *child) {
return AST_ERR;
}
-void scope_resolve(Scope *self) {
+void scope_resolve(Scope *self, AstCompilerContext *context) {
Ast *ast;
Ast *ast_end;
ast = buffer_start(&self->ast_objects);
ast_end = buffer_end(&self->ast_objects);
for(; ast != ast_end; ++ast) {
- ast_resolve(ast);
+ ast_resolve(ast, context);
+ }
+}
+
+static void lhs_resolve(LhsExpr *self, AstCompilerContext *context) {
+ /* TODO: Implement */
+ amal_log_debug("Lhs resolve %s name: %.*s, type: %.*s",
+ self->is_const ? "const" : "var",
+ self->var_name.size, self->var_name.data,
+ self->type_name.size, self->type_name.data);
+ ast_resolve(&self->rhs_expr, context);
+}
+
+static void funcdecl_resolve(FunctionDecl *self, AstCompilerContext *context) {
+ /* TODO: Implement */
+ amal_log_debug("funcdecl resolve");
+ scope_resolve(&self->body, context);
+}
+
+static void funccall_resolve(FunctionCall *self, AstCompilerContext *context) {
+ /* TODO: Implement */
+ Ast *ast;
+ Ast *ast_end;
+ ast = buffer_start(&self->args);
+ ast_end = buffer_end(&self->args);
+ amal_log_debug("funccall resolve, func name: %.*s", self->name.size, self->name.data);
+ for(; ast != ast_end; ++ast) {
+ ast_resolve(ast, context);
}
}
-static void lhs_resolve(LhsExpr *self) {
- amal_log_debug("Lhs resolve var name: %.*s", self->var_name.size, self->var_name.data);
- /*ast_resolve(&self->rhs_expr);*/
+static void variable_resolve(Variable *self, AstCompilerContext *context) {
+ /* TODO: Implement */
+ amal_log_debug("variable resolve, var name: %.*s", self->name.size, self->name.data);
+ (void)context;
}
-void ast_resolve(Ast *self) {
+static void binop_resolve(Binop *self, AstCompilerContext *context) {
+ /* TODO: Implement */
+ ast_resolve(&self->lhs, context);
+ ast_resolve(&self->rhs, context);
+}
+
+static BufferView ast_get_code_reference(Ast *self) {
+ return ast_get_name(self);
+}
+
+void ast_resolve(Ast *self, AstCompilerContext *context) {
+ if(self->resolve_status == AST_RESOLVED) {
+ return;
+ } else if(self->resolve_status == AST_RESOLVING) {
+ tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data);
+ tokenizer_print_error(&context->parser->tokenizer,
+ tokenizer_get_code_reference_index(&context->parser->tokenizer, ast_get_code_reference(self).data),
+ "Found recursive declaration");
+ throw(AST_ERR);
+ }
+
+ self->resolve_status = AST_RESOLVING;
switch(self->type) {
+ case AST_NONE:
+ case AST_NUMBER:
+ /* Nothing to resolve for numbers */
+ break;
+ case AST_FUNCTION_DECL:
+ funcdecl_resolve(self->value.func_decl, context);
+ break;
+ case AST_FUNCTION_CALL:
+ funccall_resolve(self->value.func_call, context);
+ break;
case AST_LHS:
- lhs_resolve(self->value.lhs_expr);
+ lhs_resolve(self->value.lhs_expr, context);
break;
- default:
- /* TODO: Implement all, and remove default case to give error when adding new ast type */
- assert(bool_false && "ast_resolve not implemented for type");
+ case AST_IMPORT:
+ /* TODO: When @import(...).data syntax is added, implement the resolve for it */
+ break;
+ case AST_STRING:
+ /* TODO: Convert special combinations. For example \n to newline */
+ break;
+ case AST_VARIABLE:
+ variable_resolve(self->value.variable, context);
+ break;
+ case AST_BINOP:
+ binop_resolve(self->value.binop, context);
break;
}
+ self->resolve_status = AST_RESOLVED;
}
diff --git a/src/compiler.c b/src/compiler.c
index bcb36c3..d3e4e80 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -144,16 +144,25 @@ static void* thread_callback_parse_file(void *userdata) {
/* TODO: Handle errors (stop resolving ast in all other threads and report errors/warnings) */
static void* thread_callback_resolve_ast(void *userdata) {
- Parser *parser;
CompilerAstResolverThreadUserData compiler_ast_resolver_userdata;
+ Parser *parser;
+ AstCompilerContext compiler_context;
assert(!amal_thread_is_main());
am_memcpy(&compiler_ast_resolver_userdata, userdata, sizeof(compiler_ast_resolver_userdata));
am_free(userdata);
parser = compiler_ast_resolver_userdata.parser;
+ compiler_context.parser = parser;
for(;;) {
+ int result;
amal_log_debug("Resolving AST for file: %.*s", parser->tokenizer.code_name.size, parser->tokenizer.code_name.data);
- scope_resolve(&parser->scope);
+ result = setjmp(compiler_context.env);
+ if(result == 0)
+ scope_resolve(&parser->scope, &compiler_context);
+ else {
+ /* TODO: stop resolving ast in all other threads */
+ break;
+ }
cleanup_if_error(amal_mutex_lock(&compiler_ast_resolver_userdata.compiler->mutex, "thread_callback_resolve_ast"));
if(compiler_ast_resolver_userdata.compiler->resolve_ast_index + 1 >= (int)buffer_get_size(&compiler_ast_resolver_userdata.compiler->parsers, Parser))
break;
@@ -284,7 +293,7 @@ static CHECK_RESULT int amal_compiler_resolve_ast(amal_compiler *self) {
thread_work_data.type = THREAD_WORK_RESOLVE_AST;
thread_work_data.value.parser = parser;
return_if_error(amal_compiler_select_thread_for_work(self, thread_work_data, &thread_selected));
- /* After all threads have been used, they will handle using the remaining parsers */
+ /* After all threads have been used, they will handle using the remaining parsers or stop if there is an error */
if(!thread_selected)
break;
}
diff --git a/src/parser.c b/src/parser.c
index fcc29be..b91e15a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -231,7 +231,7 @@ static THROWABLE parser_parse_function_call_or_variable(Parser *self, Ast *expr)
if(!match) {
Variable *variable;
throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable));
- variable->name = identifier;
+ variable_init(variable, identifier);
expr->type = AST_VARIABLE;
expr->value.variable = variable;
return PARSER_OK;
@@ -471,7 +471,7 @@ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_
tokenizer_print_error_object(&self->tokenizer, &self->error);
break;
case ERROR_CONTEXT_RHS_START:
- tokenizer_print_error(&self->tokenizer, "Expected string, variable, closure, function call or import");
+ tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected string, variable, closure, function call or import");
break;
default:
assert(bool_false && "Error context handling not implemented");
diff --git a/src/tokenizer.c b/src/tokenizer.c
index b2bd6c5..4f1871b 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -30,7 +30,6 @@ int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code,
self->code = code;
self->index = 0;
self->prev_index = 0;
- self->line = 1;
self->token = TOK_NONE;
self->needs_update = bool_true;
self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8);
@@ -53,8 +52,6 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) {
c = self->code.data[self->index];
switch(c) {
case '\n':
- ++self->line;
- /* fallthrough */
case ' ':
case '\t':
break;
@@ -183,7 +180,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
++self->index;
string_end = find_end_of_string(self->code, self->index);
if(string_end == -1) {
- tokenizer_print_error(self, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?");
+ tokenizer_print_error(self, self->prev_index, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?");
return TOKENIZER_ERR;
}
@@ -218,7 +215,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
result = string_to_integer_unchecked(number_str, &self->value.integer);
if(result != 0) {
/* TODO */
- tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
+ tokenizer_print_error(self, self->prev_index, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
return TOKENIZER_ERR;
}
self->number_is_integer = bool_true;
@@ -227,7 +224,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
result = string_to_float_unchecked(number_str, &self->value.floating);
if(result != 0) {
/* TODO */
- tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
+ tokenizer_print_error(self, self->prev_index, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
return TOKENIZER_ERR;
}
self->number_is_integer = bool_false;
@@ -258,7 +255,7 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
++self->index;
self->index = tokenizer_get_end_of_multiline_comment(self, self->index);
if(self->index == -1) {
- tokenizer_print_error(self, "End of multiline comment not found");
+ tokenizer_print_error(self, self->prev_index, "End of multiline comment not found");
return TOKENIZER_ERR;
}
return __tokenizer_next(self, token);
@@ -325,10 +322,10 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
return TOKENIZER_OK;
import_error:
- tokenizer_print_error(self, err_msg);
+ tokenizer_print_error(self, self->prev_index, err_msg);
return TOKENIZER_ERR;
} else {
- tokenizer_print_error(self, "Unexpected symbol '%c'", c);
+ tokenizer_print_error(self, self->prev_index, "Unexpected symbol '%c'", c);
return TOKENIZER_UNEXPECTED_TOKEN;
}
return TOKENIZER_OK;
@@ -506,7 +503,7 @@ int tokenizer_accept(Tokenizer *self, Token expected_token) {
expected_token_str = tokenizer_expected_token_as_string(expected_token);
actual_token_str = tokenizer_actual_token_as_string(self);
- tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
+ tokenizer_print_error(self, self->prev_index, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
return TOKENIZER_UNEXPECTED_TOKEN;
}
@@ -571,8 +568,24 @@ int tokenizer_get_end_of_multiline_comment(Tokenizer *self, int index) {
return -1;
}
-void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
+/* TODO: Optimize */
+static int tokenizer_get_line_by_index(Tokenizer *self, int index) {
+ int i;
+ int line;
+ if(index < 0 || index >= (int)self->code.size)
+ return -1;
+
+ line = 1;
+ for(i = 0; i < index; ++i) {
+ if(self->code.data[i] == '\n')
+ ++line;
+ }
+ return line;
+}
+
+void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...) {
va_list args;
+ int line;
int line_start;
int line_end;
int prev_column;
@@ -582,10 +595,11 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
mutex = amal_log_get_mutex();
ignore_result_int(amal_mutex_lock(mutex, "tokenizer_print_error"));
va_start(args, fmt);
- line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index);
- line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index);
- prev_column = self->prev_index - line_start;
- fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, self->line, 1 + prev_column);
+ line = tokenizer_get_line_by_index(self, index);
+ line_start = tokenizer_get_start_of_line_from_index(self, index);
+ line_end = tokenizer_get_end_of_line_from_index(self, index);
+ prev_column = index - line_start;
+ fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, line, 1 + prev_column);
vfprintf(stderr, fmt, args);
fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start);
for(i = 0; i < prev_column; ++i)
@@ -595,25 +609,8 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
ignore_result_int(amal_mutex_unlock(mutex));
}
-/* TODO: Optimize */
-static int tokenizer_get_line_by_index(Tokenizer *self, int index) {
- int i;
- int line;
- if(index < 0 || index >= (int)self->code.size)
- return -1;
-
- line = 1;
- for(i = 0; i < index; ++i) {
- if(self->code.data[i] == '\n')
- ++line;
- }
- return line;
-}
-
void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) {
- self->prev_index = error->index;
- self->line = tokenizer_get_line_by_index(self, self->prev_index);
- tokenizer_print_error(self, "%s", error->str);
+ tokenizer_print_error(self, error->index, "%s", error->str);
}
TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...) {