#include "../include/parser.h" #include "../include/ast.h" #include "../include/compiler.h" #include "../include/std/misc.h" #include "../include/std/file.h" #include "../include/std/mem.h" #include "../include/std/log.h" #include "../include/std/alloc.h" #include #include #include #define THROWABLE CHECK_RESULT int #define try(result) \ do { \ int return_if_result; \ return_if_result = (result); \ (void)return_if_result; \ } while(0) #define throw(result) do { longjmp(self->parse_env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) static THROWABLE parser_parse_rhs_start(Parser *self, Ast *rhs_expr); static THROWABLE parser_parse_body(Parser *self, Ast *ast); static THROWABLE parser_queue_file(Parser *self, BufferView path); int parser_thread_data_init(ParserThreadData *self) { am_memset(&self->allocator, 0, sizeof(self->allocator)); am_memset(&self->thread, 0, sizeof(self->thread)); self->status = PARSER_THREAD_STATUS_NEW; return scoped_allocator_init(&self->allocator); } int parser_thread_data_deinit(ParserThreadData *self) { scoped_allocator_deinit(&self->allocator); return amal_thread_deinit(&self->thread); } int parser_thread_data_start(ParserThreadData *self, AmalThreadCallbackFunc callback_func, void *userdata) { return_if_error(amal_thread_deinit(&self->thread)); return_if_error(amal_thread_create(&self->thread, AMAL_THREAD_JOINABLE, "Parser", callback_func, userdata)); self->status = PARSER_THREAD_STATUS_RUNNING; return 0; } int parser_thread_data_join(ParserThreadData *self, void **result) { if(self->status == PARSER_THREAD_STATUS_NEW) return 0; return amal_thread_join(&self->thread, result); } int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocator) { self->allocator = allocator; self->compiler = compiler; self->started = bool_false; self->error.index = 0; self->error.str = NULL; self->error_context = ERROR_CONTEXT_NONE; /* TODO: When resolving ast uses mutex, add compiler->scope as the parent of the parser scope */ self->current_scope = &self->scope; return scope_init(&self->scope, NULL, self->allocator); } /* BODY_LOOP = BODY* @end_token */ static THROWABLE parser_parse_body_loop(Parser *self, Scope *scope, Token end_token) { int result; for(;;) { Ast body_obj; bool is_end_token; throw_if_error(tokenizer_consume_if(&self->tokenizer, end_token, &is_end_token)); if(is_end_token) break; try(parser_parse_body(self, &body_obj)); result = scope_add_child(scope, &body_obj); if(result == 0) { continue; } else if(result == AST_ERR_DEF_DUP) { /* TODO: Convert ast type to string for error message */ BufferView obj_name; obj_name = ast_get_name(&body_obj); self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), "Variable with the name %.*s was declared twice in the same scope", obj_name.size, obj_name.data); self->error_context = ERROR_CONTEXT_NONE; throw(result); } else { throw(result); } } return PARSER_OK; } /* VAR_TYPE_DEF = ':' TOK_IDENTIFIER */ static THROWABLE parser_parse_var_type_def(Parser *self, BufferView *type_name) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_COLON, &match)); if(match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); *type_name = self->tokenizer.value.identifier; } return PARSER_OK; } /* LHS = ('const' TOK_IDENTIFIER VAR_TYPE_DEF? '=') | ('var' TOK_IDENTIFIER VAR_TYPE_DEF? '='|';') */ static THROWABLE parser_parse_lhs(Parser *self, LhsExpr **result, bool *assignment_or_rhs) { bool is_const; bool match; BufferView var_name; *result = NULL; *assignment_or_rhs = bool_true; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &is_const)); if(!is_const) { bool isVar; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar)); if(!isVar) return PARSER_OK; } throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)result)); lhsexpr_init(*result, is_const, var_name); try(parser_parse_var_type_def(self, &(*result)->type_name)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_EQUALS, &match)); if(match) { *assignment_or_rhs = bool_true; return PARSER_OK; } *assignment_or_rhs = bool_false; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(match && is_const) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "const variable declaration requires assignment (expected '=', got ';')"); throw(PARSER_UNEXPECTED_TOKEN); } if(!match) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected '=' or ';'"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; } /* CLOSURE = 'fn' ('(' PARAM* ')')? '{' BODY_LOOP '}' */ static THROWABLE parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { bool match; Scope *prev_scope; *func_decl = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_FN, &match)); if(!match) return PARSER_OK; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_BRACE, &match)); if(!match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); /* TODO: Parse parameters */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); /* TODO: Parse return types */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionDecl), (void**)func_decl)); throw_if_error(funcdecl_init(*func_decl, self->current_scope, self->allocator)); prev_scope = self->current_scope; self->current_scope = &(*func_decl)->body; try(parser_parse_body_loop(self, self->current_scope, TOK_CLOSING_BRACE)); self->current_scope = (*func_decl)->body.parent; assert(self->current_scope == prev_scope); return PARSER_OK; } /* FUNC_ARGS = (RHS_START)? (',' RHS_START)* ')' */ static THROWABLE parser_parse_function_args(Parser *self, FunctionCall *func_call) { bool first_arg; first_arg = bool_true; for(;;) { Ast arg_expr; bool is_end_token; arg_expr = ast_none(); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_PAREN, &is_end_token)); if(is_end_token) break; if(!first_arg) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_COMMA)); first_arg = bool_false; try(parser_parse_rhs_start(self, &arg_expr)); throw_if_error(buffer_append(&func_call->args, &arg_expr, sizeof(arg_expr))); } return PARSER_OK; } /* VARIABLE = TOK_IDENTIFIER FUNC_CALL_OR_VARIABLE = VARIABLE '(' FUNC_ARGS ')' */ static THROWABLE parser_parse_function_call_or_variable(Parser *self, Ast *expr) { bool match; BufferView identifier; FunctionCall *func_call; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &match)); if(!match) return PARSER_OK; identifier = self->tokenizer.value.identifier; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) { Variable *variable; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable)); variable_init(variable, identifier); ast_init(expr, variable, AST_VARIABLE); return PARSER_OK; } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)&func_call)); throw_if_error(funccall_init(func_call, self->tokenizer.value.identifier, self->allocator)); ast_init(expr, func_call, AST_FUNCTION_CALL); try(parser_parse_function_args(self, func_call)); return PARSER_OK; } /* IMPORT = IMPORT_SYMBOL */ static THROWABLE parser_parse_import(Parser *self, Import **import) { bool match; *import = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IMPORT, &match)); if(!match) return PARSER_OK; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Import), (void**)import)); import_init(*import, self->tokenizer.value.string); return PARSER_OK; } static THROWABLE parser_parse_number(Parser *self, Ast *rhs_expr) { bool match; Number *number; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_NUMBER, &match)); if(!match) return PARSER_OK; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Number), (void**)&number)); number_init(number, self->tokenizer.value.integer, self->tokenizer.number_is_integer); ast_init(rhs_expr, number, AST_NUMBER); return PARSER_OK; } /* RHS_S = STRING | NUMBER | FUNC_CALL_OR_VARIABLE */ static THROWABLE parser_parse_rhs_single_expr(Parser *self, Ast *rhs_expr) { bool match; *rhs_expr = ast_none(); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &match)); if(match) { String *string; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string)); throw_if_error(string_init(string, self->tokenizer.value.string)); ast_init(rhs_expr, string, AST_STRING); return PARSER_OK; } try(parser_parse_number(self, rhs_expr)); if(rhs_expr->type != AST_NONE) return PARSER_OK; try(parser_parse_function_call_or_variable(self, rhs_expr)); if(rhs_expr->type != AST_NONE) return PARSER_OK; self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected string, variable or function call"); throw(PARSER_UNEXPECTED_TOKEN); } static THROWABLE parser_parse_rhs_binop(Parser *self, Ast *expr); /* RHS_BINOP_OPT_PAREN = RHS_S | '(' RHS_BINOP ')' */ static THROWABLE parser_parse_rhs_binop_opt_paren(Parser *self, Ast *expr) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) { try(parser_parse_rhs_single_expr(self, expr)); return PARSER_OK; } try(parser_parse_rhs_binop(self, expr)); throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); if(expr->type == AST_BINOP) expr->value.binop->grouped = bool_true; return PARSER_OK; } /* RHS_BINOP = RHS_BINOP_OPT_PAREN (TOK_BINOP RHS_BINOP_OPT_PAREN)? Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ int parser_parse_rhs_binop(Parser *self, Ast *expr) { bool match; Ast lhs; Ast rhs; BinopType binop_type; Binop *binop; try(parser_parse_rhs_binop_opt_paren(self, &lhs)); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_BINOP, &match)); if(!match) { *expr = lhs; return PARSER_OK; } binop_type = self->tokenizer.value.binop_type; try(parser_parse_rhs_binop(self, &rhs)); throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Binop), (void**)&binop)); binop_init(binop); binop->type = binop_type; binop->lhs = lhs; binop->rhs = rhs; ast_init(expr, binop, AST_BINOP); return PARSER_OK; } /* RHS = RHS_BINOP ';' Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ static THROWABLE parser_parse_rhs(Parser *self, Ast *rhs_expr) { /* TODO: If binop only contains one expression, then use that directly for @rhs_expr */ try(parser_parse_rhs_binop(self, rhs_expr)); /* TODO: Implement this */ /*binop_reorder_by_precedence(binop);*/ return PARSER_OK; } /* RHS_START = CLOSURE | IMPORT | RHS */ int parser_parse_rhs_start(Parser *self, Ast *rhs_expr) { FunctionDecl *func_decl; Import *import; try(parser_parse_function_decl(self, &func_decl)); if(func_decl) { ast_init(rhs_expr, func_decl, AST_FUNCTION_DECL); return PARSER_OK; } try(parser_parse_import(self, &import)); if(import) { ast_init(rhs_expr, import, AST_IMPORT); try(parser_queue_file(self, import->path)); return PARSER_OK; } self->error_context = ERROR_CONTEXT_RHS_START; try(parser_parse_rhs(self, rhs_expr)); self->error_context = ERROR_CONTEXT_NONE; return PARSER_OK; } /* BODY_SEMICOLON = ';' Note: Semicolon is not required for closures, structs and tables */ static THROWABLE parser_parse_body_semicolon(Parser *self, Ast *expr) { if(expr->type == AST_BINOP) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(!match) { /* TODO: Specify all the binop characters instead of "binop" which doesn't make sense for the user */ self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected ';' or binop"); throw(PARSER_UNEXPECTED_TOKEN); } return PARSER_OK; } /* TODO: Check for struct and tables */ if(expr->type != AST_FUNCTION_DECL) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); return PARSER_OK; } /* BODY = LHS ';' | (LHS '=' RHS_START BODY_SEMICOLON) | (RHS_START BODY_SEMICOLON) */ int parser_parse_body(Parser *self, Ast *ast) { bool assignment_or_rhs; LhsExpr *lhs_expr; Ast rhs_expr; rhs_expr = ast_none(); try(parser_parse_lhs(self, &lhs_expr, &assignment_or_rhs)); if(!assignment_or_rhs) { ast_init(ast, lhs_expr, AST_LHS); return PARSER_OK; } try(parser_parse_rhs_start(self, &rhs_expr)); if(lhs_expr) { lhs_expr->rhs_expr = rhs_expr; ast_init(ast, lhs_expr, AST_LHS); } else { *ast = rhs_expr; } try(parser_parse_body_semicolon(self, &rhs_expr)); return PARSER_OK; } /* ROOT = BODY_LOOP */ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) { int result; throw_if_error(tokenizer_init(&self->tokenizer, self->allocator, code_buffer, buffer_name)); result = setjmp(self->parse_env); if(result == 0) try(parser_parse_body_loop(self, &self->scope, TOK_END_OF_FILE)); else if(self->error.str != NULL) { switch(self->error_context) { case ERROR_CONTEXT_NONE: tokenizer_print_error_object(&self->tokenizer, &self->error); break; case ERROR_CONTEXT_RHS_START: tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected string, variable, closure, function call or import"); break; default: assert(bool_false && "Error context handling not implemented"); break; } } assert(self->current_scope == &self->scope); return result; } int parser_parse_file(Parser *self, BufferView filepath) { int result; char *file_data; usize file_size; amal_log_debug("Parsing %.*s", (int)filepath.size, filepath.data); assert(!self->started && "Parser can't be reused. Create a new parser."); self->started = bool_true; assert(filepath.size > 0 && filepath.data[filepath.size] == '\0'); result = read_whole_file(filepath.data, &file_data, &file_size); if(result != 0) return result; result = parser_parse_buffer(self, create_buffer_view(file_data, file_size), filepath); return result; } static CHECK_RESULT int file_path_join(BufferView directory, BufferView file, ScopedAllocator *allocator, Buffer *result) { return_if_error(buffer_init(result, allocator)); return_if_error(buffer_append(result, NULL, directory.size + 1 + file.size + 1)); am_memcpy(result->data, directory.data, directory.size); result->data[directory.size] = '/'; am_memcpy(result->data + directory.size + 1, file.data, file.size); result->data[directory.size + 1 + file.size] = '\0'; return 0; } /* Path can be path to included library path (or system library path) in which case the path separator is a dot, otherwise the path separator is forward slash '/' */ int parser_queue_file(Parser *self, BufferView path) { /* TODO: Do not load same path twice or the compile will fail (and it can have recursive import) also for performance reasons */ /* TODO: Parse special path (to include library path with dots) */ BufferView file_directory; BufferView filename; Buffer file_to_parse; file_directory = file_get_parent_directory(self->tokenizer.code_name); filename = file_get_name(path); return_if_error(file_path_join(file_directory, filename, self->allocator, &file_to_parse)); /* We want buffer to be null terminated but null terminated character should not be included for the length. */ throw_if_error(amal_compiler_load_file(self->compiler, create_buffer_view(file_to_parse.data, file_to_parse.size - 1))); return PARSER_OK; }