#include "../include/parser.h" #include "../include/ast.h" #include "../include/compiler.h" #include "../include/std/misc.h" #include "../include/std/file.h" #include "../include/std/mem.h" #include "../include/std/log.h" #include "../include/std/alloc.h" #include #include #include #define THROWABLE CHECK_RESULT int #define try(result) \ do { \ int return_if_result; \ return_if_result = (result); \ (void)return_if_result; \ } while(0) #define throw(result) do { longjmp(self->parse_env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) static THROWABLE parser_parse_rhs_start(Parser *self, Ast *rhs_expr); static THROWABLE parser_parse_body(Parser *self, Ast *ast); static THROWABLE parser_queue_file(Parser *self, BufferView path); int parser_thread_data_init(ParserThreadData *self) { am_memset(&self->allocator, 0, sizeof(self->allocator)); am_memset(&self->thread, 0, sizeof(self->thread)); self->status = PARSER_THREAD_STATUS_NEW; return scoped_allocator_init(&self->allocator); } int parser_thread_data_deinit(ParserThreadData *self) { scoped_allocator_deinit(&self->allocator); return amal_thread_deinit(&self->thread); } int parser_thread_data_start(ParserThreadData *self, AmalThreadCallbackFunc callback_func, void *userdata) { return_if_error(amal_thread_deinit(&self->thread)); return_if_error(amal_thread_create(&self->thread, AMAL_THREAD_JOINABLE, "Parser", callback_func, userdata)); self->status = PARSER_THREAD_STATUS_RUNNING; return 0; } int parser_thread_data_join(ParserThreadData *self, void **result) { if(self->status == PARSER_THREAD_STATUS_NEW) return 0; return amal_thread_join(&self->thread, result); } int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocator) { self->allocator = allocator; self->compiler = compiler; self->started = bool_false; self->error.index = 0; self->error.str = NULL; self->error_context = ERROR_CONTEXT_NONE; return scope_init(&self->scope, self->allocator); } /* BODY_LOOP = BODY* @end_token */ static THROWABLE parser_parse_body_loop(Parser *self, Scope *scope, Token end_token) { int result; for(;;) { Ast body_obj; bool is_end_token; throw_if_error(tokenizer_consume_if(&self->tokenizer, end_token, &is_end_token)); if(is_end_token) break; try(parser_parse_body(self, &body_obj)); result = scope_add_child(scope, &body_obj); if(result == 0) { continue; } else if(result == AST_ERR_DEF_DUP) { /* TODO: Convert ast type to string for error message */ BufferView obj_name; obj_name = ast_get_name(&body_obj); self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), "A variable with the name %.*s was declared twice in the same scope", obj_name.size, obj_name.data); self->error_context = ERROR_CONTEXT_NONE; throw(result); } else { throw(result); } } return PARSER_OK; } /* VAR_TYPE_DEF = ':' TOK_IDENTIFIER */ static THROWABLE parser_parse_var_type_def(Parser *self, BufferView *type_name) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_COLON, &match)); if(match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); *type_name = self->tokenizer.value.identifier; } return PARSER_OK; } /* LHS = ('const' TOK_IDENTIFIER VAR_TYPE_DEF? '=') | ('var' TOK_IDENTIFIER VAR_TYPE_DEF? '='|';') */ static THROWABLE parser_parse_lhs(Parser *self, LhsExpr **result, bool *assignment) { bool is_const; bool match; BufferView var_name; *result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &is_const)); if(!is_const) { bool isVar; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar)); if(!isVar) return PARSER_OK; } throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)result)); lhsexpr_init(*result, is_const, var_name); try(parser_parse_var_type_def(self, &(*result)->type_name)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_EQUALS, &match)); if(match) { *assignment = bool_true; return PARSER_OK; } *assignment = bool_false; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(match && is_const) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "const variable declaration requires assignment (expected '=', got ';')"); throw(PARSER_UNEXPECTED_TOKEN); } if(!match) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected '=' or ';'"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; } /* CLOSURE = 'fn' ('(' PARAM* ')')? '{' BODY_LOOP '}' */ static THROWABLE parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { bool match; *func_decl = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_FN, &match)); if(!match) return PARSER_OK; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_BRACE, &match)); if(!match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); /* TODO: Parse parameters */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); /* TODO: Parse return types */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionDecl), (void**)func_decl)); throw_if_error(funcdecl_init(*func_decl, self->allocator)); try(parser_parse_body_loop(self, &(*func_decl)->body, TOK_CLOSING_BRACE)); return PARSER_OK; } /* FUNC_ARGS = (RHS_START)? (',' RHS_START)* ')' */ static THROWABLE parser_parse_function_args(Parser *self, FunctionCall *func_call) { bool first_arg; first_arg = bool_true; for(;;) { Ast arg_expr; bool is_end_token; arg_expr = ast_none(); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_PAREN, &is_end_token)); if(is_end_token) break; if(!first_arg) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_COMMA)); first_arg = bool_false; try(parser_parse_rhs_start(self, &arg_expr)); throw_if_error(buffer_append(&func_call->args, &arg_expr, sizeof(arg_expr))); } return PARSER_OK; } /* VARIABLE = TOK_IDENTIFIER FUNC_CALL_OR_VARIABLE = VARIABLE '(' FUNC_ARGS ')' */ static THROWABLE parser_parse_function_call_or_variable(Parser *self, Ast *expr) { bool match; BufferView identifier; FunctionCall *func_call; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &match)); if(!match) return PARSER_OK; identifier = self->tokenizer.value.identifier; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) { Variable *variable; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable)); variable->name = identifier; expr->type = AST_VARIABLE; expr->value.variable = variable; return PARSER_OK; } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)&func_call)); throw_if_error(funccall_init(func_call, self->tokenizer.value.identifier, self->allocator)); expr->type = AST_FUNCTION_CALL; expr->value.func_call = func_call; try(parser_parse_function_args(self, func_call)); return PARSER_OK; } /* IMPORT = IMPORT_SYMBOL */ static THROWABLE parser_parse_import(Parser *self, Import **import) { bool match; *import = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IMPORT, &match)); if(!match) return PARSER_OK; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Import), (void**)import)); import_init(*import, self->tokenizer.value.string); return PARSER_OK; } static THROWABLE parser_parse_number(Parser *self, Ast *rhs_expr) { bool match; Number *number; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_NUMBER, &match)); if(!match) return PARSER_OK; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Number), (void**)&number)); number_init(number, self->tokenizer.value.integer, self->tokenizer.number_is_integer); rhs_expr->type = AST_NUMBER; rhs_expr->value.number = number; return PARSER_OK; } /* RHS_S = STRING | NUMBER | FUNC_CALL_OR_VARIABLE */ static THROWABLE parser_parse_rhs_single_expr(Parser *self, Ast *rhs_expr) { bool match; *rhs_expr = ast_none(); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &match)); if(match) { String *string; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string)); throw_if_error(string_init(string, self->tokenizer.value.string)); rhs_expr->type = AST_STRING; rhs_expr->value.string = string; return PARSER_OK; } try(parser_parse_number(self, rhs_expr)); if(rhs_expr->type != AST_NONE) return PARSER_OK; try(parser_parse_function_call_or_variable(self, rhs_expr)); if(rhs_expr->type != AST_NONE) return PARSER_OK; self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected string, variable or function call"); throw(PARSER_UNEXPECTED_TOKEN); } static THROWABLE parser_parse_rhs_binop(Parser *self, Ast *expr); /* RHS_BINOP_OPT_PAREN = RHS_S | '(' RHS_BINOP ')' */ static THROWABLE parser_parse_rhs_binop_opt_paren(Parser *self, Ast *expr) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) { try(parser_parse_rhs_single_expr(self, expr)); return PARSER_OK; } try(parser_parse_rhs_binop(self, expr)); throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); if(expr->type == AST_BINOP) expr->value.binop->grouped = bool_true; return PARSER_OK; } /* RHS_BINOP = RHS_BINOP_OPT_PAREN (TOK_BINOP RHS_BINOP_OPT_PAREN)? Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ int parser_parse_rhs_binop(Parser *self, Ast *expr) { bool match; Ast lhs; Ast rhs; BinopType binop_type; try(parser_parse_rhs_binop_opt_paren(self, &lhs)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_BINOP, &match)); if(!match) { *expr = lhs; return PARSER_OK; } binop_type = self->tokenizer.value.binop_type; try(parser_parse_rhs_binop(self, &rhs)); throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Binop), (void**)&expr->value.binop)); binop_init(expr->value.binop); expr->value.binop->type = binop_type; expr->value.binop->lhs = lhs; expr->value.binop->rhs = rhs; expr->type = AST_BINOP; return PARSER_OK; } /* RHS = RHS_BINOP ';' Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ static THROWABLE parser_parse_rhs(Parser *self, Ast *rhs_expr) { /* TODO: If binop only contains one expression, then use that directly for @rhs_expr */ try(parser_parse_rhs_binop(self, rhs_expr)); /* TODO: Implement this */ /*binop_reorder_by_precedence(binop);*/ return PARSER_OK; } /* RHS_START = CLOSURE | IMPORT | RHS */ int parser_parse_rhs_start(Parser *self, Ast *rhs_expr) { FunctionDecl *func_decl; Import *import; try(parser_parse_function_decl(self, &func_decl)); if(func_decl) { rhs_expr->type = AST_FUNCTION_DECL; rhs_expr->value.func_decl = func_decl; return PARSER_OK; } try(parser_parse_import(self, &import)); if(import) { rhs_expr->type = AST_IMPORT; rhs_expr->value.import = import; try(parser_queue_file(self, import->path)); return PARSER_OK; } self->error_context = ERROR_CONTEXT_RHS_START; try(parser_parse_rhs(self, rhs_expr)); self->error_context = ERROR_CONTEXT_NONE; return PARSER_OK; } /* BODY_SEMICOLON = ';' Note: Semicolon is not required for closures, structs and tables */ static THROWABLE parser_parse_body_semicolon(Parser *self, Ast *expr) { if(expr->type == AST_BINOP) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(!match) { /* TODO: Specify all the binop characters instead of "binop" which doesn't make sense for the user */ self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected ';' or binop"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; } /* TODO: Check for struct and tables */ if(expr->type != AST_FUNCTION_DECL) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); return PARSER_OK; } /* BODY = LHS ';' | (LHS '=' RHS_START BODY_SEMICOLON) | (RHS_START BODY_SEMICOLON) */ int parser_parse_body(Parser *self, Ast *ast) { bool assignment; LhsExpr *lhs_expr; Ast rhs_expr; rhs_expr = ast_none(); try(parser_parse_lhs(self, &lhs_expr, &assignment)); if(!assignment) { ast->type = AST_LHS; ast->value.lhs_expr = lhs_expr; return PARSER_OK; } try(parser_parse_rhs_start(self, &rhs_expr)); if(lhs_expr) { lhs_expr->rhs_expr = rhs_expr; ast->type = AST_LHS; ast->value.lhs_expr = lhs_expr; } else { *ast = rhs_expr; } try(parser_parse_body_semicolon(self, &rhs_expr)); return PARSER_OK; } /* ROOT = BODY_LOOP */ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) { int result; throw_if_error(tokenizer_init(&self->tokenizer, self->allocator, code_buffer, buffer_name)); result = setjmp(self->parse_env); if(result == 0) try(parser_parse_body_loop(self, &self->scope, TOK_END_OF_FILE)); else if(self->error.str != NULL) { switch(self->error_context) { case ERROR_CONTEXT_NONE: tokenizer_print_error_object(&self->tokenizer, &self->error); break; case ERROR_CONTEXT_RHS_START: tokenizer_print_error(&self->tokenizer, "Expected string, variable, closure, function call or import"); break; default: assert(bool_false && "Error context handling not implemented"); break; } } return result; } int parser_parse_file(Parser *self, BufferView filepath) { int result; char *file_data; usize file_size; char *filepath_tmp; amal_log_debug("Parsing %.*s", (int)filepath.size, filepath.data); assert(!self->started && "Parser can't be reused. Create a new parser."); self->started = bool_true; /* TODO: Somehow free this... */ /*return_if_error(scoped_allocator_alloc(self->allocator, filepath.size + 1, (void**)&filepath_tmp));*/ filepath_tmp = malloc(filepath.size + 1); am_memcpy(filepath_tmp, filepath.data, filepath.size); filepath_tmp[filepath.size] = '\0'; result = read_whole_file(filepath_tmp, &file_data, &file_size); if(result != 0) return result; result = parser_parse_buffer(self, create_buffer_view(file_data, file_size), create_buffer_view(filepath_tmp, filepath.size)); /* TODO: Somehow free this.. causes issue where filepath becomes corrupt */ /*am_free(file_data);*/ return result; } /* Path can be path to included library path (or system library path) in which case the path separator is a dot, otherwise the path separator is forward slash '/' */ int parser_queue_file(Parser *self, BufferView path) { /* TODO: Do not load same path twice or the compile will fail (and it can have recursive import) also for performance reasons */ /* TODO: Parse special path (to include library path with dots) */ /* TODO: Path should be relative to the file that uses @import */ throw_if_error(amal_compiler_load_file(self->compiler, path)); return PARSER_OK; }