#include "../include/parser.h" #include "../include/ast.h" #include "../include/compiler.h" #include "../include/std/misc.h" #include "../include/std/file.h" #include "../include/std/mem.h" #include "../include/std/log.h" #include "../include/std/alloc.h" #include #include #include #define throw(result) do { throw_debug_msg; longjmp(self->parse_env, (result)); } while(0) #define throw_if_error(result) \ do { \ int return_if_result; \ return_if_result = (result); \ if((return_if_result) != 0) \ throw(return_if_result); \ } while(0) static CHECK_RESULT Ast* parser_parse_rhs_start(Parser *self, bool is_standalone); static CHECK_RESULT Ast* parser_parse_body(Parser *self); static CHECK_RESULT Ast* parser_parse_struct_body(Parser *self); static void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_scope); int parser_thread_data_init(ParserThreadData *self) { am_memset(&self->allocator, 0, sizeof(self->allocator)); am_memset(&self->thread, 0, sizeof(self->thread)); self->status = PARSER_THREAD_STATUS_NEW; return scoped_allocator_init(&self->allocator); } int parser_thread_data_deinit(ParserThreadData *self) { ignore_result_int(amal_thread_deinit(&self->thread)); scoped_allocator_deinit(&self->allocator); return 0; } int parser_thread_data_start(ParserThreadData *self, AmalThreadCallbackFunc callback_func, void *userdata) { return_if_error(amal_thread_deinit(&self->thread)); return_if_error(amal_thread_create(&self->thread, AMAL_THREAD_JOINABLE, "Parser", callback_func, userdata)); self->status = PARSER_THREAD_STATUS_RUNNING; return 0; } int parser_thread_data_join(ParserThreadData *self, void **result) { if(self->status == PARSER_THREAD_STATUS_NEW) return 0; return amal_thread_join(&self->thread, result); } int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocator) { self->allocator = allocator; self->compiler = compiler; self->ssa_context = NULL; self->started = bool_false; self->error.index = 0; self->error.str = NULL; self->error_context = ERROR_CONTEXT_NONE; return_if_error(structdecl_init(&self->struct_decl, &compiler->root_scope, allocator)); self->struct_decl.body.parser = self; return_if_error(lhsexpr_init(&self->file_decl, bool_true, bool_true, create_buffer_view_null(), self->allocator)); return_if_error(ast_create(self->allocator, &self->struct_decl, AST_STRUCT_DECL, &self->file_decl.rhs_expr)); self->current_scope = &self->struct_decl.body; self->has_func_parent = bool_false; return PARSER_OK; } /* BODY_LOOP = BODY* @end_token */ static void parser_parse_body_loop(Parser *self, Scope *scope, Token end_token) { int result; for(;;) { Ast *body_obj; bool is_end_token; throw_if_error(tokenizer_consume_if(&self->tokenizer, end_token, &is_end_token)); if(is_end_token) break; body_obj = parser_parse_body(self); result = scope_add_child(scope, body_obj); if(result == 0) { continue; } else if(result == AST_ERR_DEF_DUP) { /* TODO: Convert ast type to string for error message */ BufferView obj_name; obj_name = ast_get_name(body_obj); self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), "Variable with the name %.*s was declared twice in the same scope", obj_name.size, obj_name.data); self->error_context = ERROR_CONTEXT_NONE; throw(result); } else { throw(result); } } } /* STRUCT_BODY_LOOP = '{' STRUCT_BODY* '}' */ static void parser_parse_struct_body_loop(Parser *self, Scope *scope) { int result; throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); for(;;) { Ast *body_obj; bool is_end_token; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &is_end_token)); if(is_end_token) break; body_obj = parser_parse_struct_body(self); result = scope_add_child(scope, body_obj); if(result == 0) { continue; } else if(result == AST_ERR_DEF_DUP) { /* TODO: Convert ast type to string for error message */ BufferView obj_name; obj_name = ast_get_name(body_obj); self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, obj_name.data), "Variable with the name %.*s was declared twice in the struct", obj_name.size, obj_name.data); self->error_context = ERROR_CONTEXT_NONE; throw(result); } else { throw(result); } } } /* VAR_TYPE_DEF = ':' TOK_IDENTIFIER */ static void parser_parse_var_type_def(Parser *self, BufferView *type_name) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_COLON, &match)); if(match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); *type_name = self->tokenizer.value.identifier; } } /* LHS = ('pub'? 'const' TOK_IDENTIFIER VAR_TYPE_DEF? '=') | ('pub'? 'var' TOK_IDENTIFIER VAR_TYPE_DEF? '='|';') */ static CHECK_RESULT LhsExpr* parser_parse_lhs(Parser *self, bool *assignment_or_rhs) { LhsExpr *result; bool is_pub; bool is_const; bool match; BufferView var_name; result = NULL; *assignment_or_rhs = bool_true; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_PUB, &is_pub)); if(is_pub && self->has_func_parent) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, self->tokenizer.value.identifier.data), "Only declarations in structs can be public"); throw(PARSER_UNEXPECTED_TOKEN); } throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &is_const)); if(!is_const) { bool isVar; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar)); if(!isVar) return result; } throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)&result)); throw_if_error(lhsexpr_init(result, is_pub, is_const, var_name, self->allocator)); parser_parse_var_type_def(self, &result->type.name); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_EQUALS, &match)); if(match) { *assignment_or_rhs = bool_true; return result; } if(!result->type.name.data) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Variable declaration requires type or right-hand side expression (Expected ':' or '=')"); throw(PARSER_UNEXPECTED_TOKEN); } *assignment_or_rhs = bool_false; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(match && is_const) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Const variable declaration requires assignment (expected '=', got ';')"); throw(PARSER_UNEXPECTED_TOKEN); } if(!match) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected ';'"); throw(PARSER_UNEXPECTED_TOKEN); } return result; } /* TODO: Implement params and return types CLOSURE = 'fn' ('(' PARAM? (',' PARAM)* ')')? '{' BODY_LOOP '}' */ static CHECK_RESULT FunctionDecl* parser_parse_closure(Parser *self) { FunctionDecl *result; bool match; bool prev_has_func_parent; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_FN, &match)); if(!match) return result; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_BRACE, &match)); if(!match) { throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); /* TODO: Parse parameters */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); /* TODO: Parse return types */ throw_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionDecl), (void**)&result)); throw_if_error(funcdecl_init(result, self->current_scope, self->allocator)); self->current_scope = &result->body; prev_has_func_parent = self->has_func_parent; self->has_func_parent = bool_true; parser_parse_body_loop(self, self->current_scope, TOK_CLOSING_BRACE); self->current_scope = result->body.parent; self->has_func_parent = prev_has_func_parent; return result; } /* STRUCT = 'struct' '{' STRUCT_BODY_LOOP '}' */ static CHECK_RESULT StructDecl* parser_parse_struct_decl(Parser *self) { StructDecl *result; bool match; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRUCT, &match)); if(!match) return result; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(StructDecl), (void**)&result)); throw_if_error(structdecl_init(result, self->current_scope, self->allocator)); self->current_scope = &result->body; parser_parse_struct_body_loop(self, self->current_scope); self->current_scope = result->body.parent; return result; } /* FUNC_ARGS = (RHS_START)? (',' RHS_START)* ')' */ static void parser_parse_function_args(Parser *self, FunctionCall *func_call) { bool first_arg; first_arg = bool_true; for(;;) { Ast *arg_expr; bool is_end_token; arg_expr = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_PAREN, &is_end_token)); if(is_end_token) break; if(!first_arg) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_COMMA)); first_arg = bool_false; arg_expr = parser_parse_rhs_start(self, bool_false); throw_if_error(buffer_append(&func_call->args, &arg_expr, sizeof(arg_expr))); } } /* VARIABLE = TOK_IDENTIFIER FUNC_CALL_OR_VARIABLE = VARIABLE '(' FUNC_ARGS ')' */ static CHECK_RESULT Ast* parser_parse_function_call_or_variable(Parser *self) { Ast *result; bool match; BufferView identifier; FunctionCall *func_call; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &match)); if(!match) return result; identifier = self->tokenizer.value.identifier; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) { Variable *variable; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable)); variable_init(variable, identifier); throw_if_error(ast_create(self->allocator, variable, AST_VARIABLE, &result)); return result; } throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)&func_call)); throw_if_error(funccall_init(func_call, self->tokenizer.value.identifier, self->allocator)); throw_if_error(ast_create(self->allocator, func_call, AST_FUNCTION_CALL, &result)); parser_parse_function_args(self, func_call); return result; } /* IMPORT = IMPORT_SYMBOL */ static CHECK_RESULT Import* parser_parse_import(Parser *self) { Import *result; bool match; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IMPORT, &match)); if(!match) return result; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Import), (void**)&result)); import_init(result, self->tokenizer.value.string); return result; } static CHECK_RESULT Ast* parser_parse_number(Parser *self) { Ast *result; bool match; Number *number; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_NUMBER, &match)); if(!match) return result; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Number), (void**)&number)); number_init(number, self->tokenizer.value.integer, self->tokenizer.number_is_integer, create_buffer_view(self->tokenizer.code.data + self->tokenizer.prev_index, self->tokenizer.index - self->tokenizer.prev_index)); throw_if_error(ast_create(self->allocator, number, AST_NUMBER, &result)); return result; } /* RHS_S = STRING | NUMBER | FUNC_CALL_OR_VARIABLE */ static Ast* parser_parse_rhs_single_expr(Parser *self) { Ast *result; bool match; result = NULL; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &match)); if(match) { String *string; throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string)); throw_if_error(string_init(string, self->tokenizer.value.string)); throw_if_error(ast_create(self->allocator, string, AST_STRING, &result)); return result; } result = parser_parse_number(self); if(result) return result; result = parser_parse_function_call_or_variable(self); if(result) return result; self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected string, variable or function call"); throw(PARSER_UNEXPECTED_TOKEN); } static CHECK_RESULT Ast* parser_parse_rhs_binop(Parser *self); /* RHS_BINOP_OPT_PAREN = RHS_S | '(' RHS_BINOP ')' */ static CHECK_RESULT Ast* parser_parse_rhs_binop_opt_paren(Parser *self) { Ast *result; bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &match)); if(!match) return parser_parse_rhs_single_expr(self); result = parser_parse_rhs_binop(self); throw_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); if(result->type == AST_BINOP) result->value.binop->grouped = bool_true; return result; } /* RHS_BINOP = RHS_BINOP_OPT_PAREN (TOK_BINOP RHS_BINOP_OPT_PAREN)? Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ Ast* parser_parse_rhs_binop(Parser *self) { bool match; Ast *result; Ast *lhs; Ast *rhs; BinopType binop_type; Binop *binop; lhs = parser_parse_rhs_binop_opt_paren(self); throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_BINOP, &match)); if(!match) return lhs; binop_type = self->tokenizer.value.binop_type; rhs = parser_parse_rhs_binop(self); throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(Binop), (void**)&binop)); binop_init(binop); binop->type = binop_type; binop->lhs = lhs; binop->rhs = rhs; throw_if_error(ast_create(self->allocator, binop, AST_BINOP, &result)); return result; } /* RHS = RHS_BINOP ';' Note: Parantheses count has to match for the beginning paranthesis and the ending parenthesis. */ static CHECK_RESULT Ast* parser_parse_rhs(Parser *self) { /* TODO: If binop only contains one expression, then use that directly for @rhs_expr */ Ast *result; result = parser_parse_rhs_binop(self); /* TODO: Implement this */ /*binop_reorder_by_precedence(binop);*/ return result; } /* RHS_START = CLOSURE | IMPORT | RHS */ Ast* parser_parse_rhs_start(Parser *self, bool is_standalone) { Ast *result; if(!is_standalone) { FunctionDecl *func_decl; StructDecl *struct_decl; Import *import; func_decl = parser_parse_closure(self); if(func_decl) { throw_if_error(ast_create(self->allocator, func_decl, AST_FUNCTION_DECL, &result)); return result; } struct_decl = parser_parse_struct_decl(self); if(struct_decl) { throw_if_error(ast_create(self->allocator, struct_decl, AST_STRUCT_DECL, &result)); return result; } import = parser_parse_import(self); if(import) { parser_queue_file(self, import->path, &import->file_scope); throw_if_error(ast_create(self->allocator, import, AST_IMPORT, &result)); return result; } self->error_context = ERROR_CONTEXT_RHS_STANDALONE; } result = parser_parse_rhs(self); self->error_context = ERROR_CONTEXT_NONE; return result; } /* BODY_SEMICOLON = ';' Note: Semicolon is not required for closures, structs and tables */ static void parser_parse_body_semicolon(Parser *self, Ast *expr) { if(expr->type == AST_BINOP) { bool match; throw_if_error(tokenizer_consume_if(&self->tokenizer, TOK_SEMICOLON, &match)); if(!match) { /* TODO: Specify all the binop characters instead of "binop" which doesn't make sense for the user */ self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_error_index(&self->tokenizer), "Expected ';' or binop"); throw(PARSER_UNEXPECTED_TOKEN); } return; } /* TODO: Check for tables */ if(expr->type != AST_FUNCTION_DECL && expr->type != AST_STRUCT_DECL) throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); } /* BODY = LHS ';' | (LHS '=' RHS_START BODY_SEMICOLON) | (RHS_START BODY_SEMICOLON) */ Ast* parser_parse_body(Parser *self) { bool assignment_or_rhs; Ast *result; LhsExpr *lhs_expr; Ast *rhs_expr; lhs_expr = parser_parse_lhs(self, &assignment_or_rhs); if(!assignment_or_rhs) { throw_if_error(ast_create(self->allocator, lhs_expr, AST_LHS, &result)); return result; } if(!lhs_expr) self->error_context = ERROR_CONTEXT_NO_LHS; rhs_expr = parser_parse_rhs_start(self, !lhs_expr); if(lhs_expr) { lhs_expr->rhs_expr = rhs_expr; throw_if_error(ast_create(self->allocator, lhs_expr, AST_LHS, &result)); } else { result = rhs_expr; } parser_parse_body_semicolon(self, rhs_expr); return result; } /* STRUCT_BODY = TOK_IDENTIFIER ':' TOK_IDENTIFIER ';' */ Ast* parser_parse_struct_body(Parser *self) { Ast *result; BufferView var_name; BufferView type_name; StructField *struct_field; throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); var_name = self->tokenizer.value.identifier; throw_if_error(tokenizer_accept(&self->tokenizer, TOK_COLON)); throw_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); type_name = self->tokenizer.value.identifier; throw_if_error(tokenizer_accept(&self->tokenizer, TOK_SEMICOLON)); throw_if_error(scoped_allocator_alloc(self->allocator, sizeof(LhsExpr), (void**)&struct_field)); structfield_init(struct_field, var_name, type_name); throw_if_error(ast_create(self->allocator, struct_field, AST_STRUCT_FIELD, &result)); return result; } /* ROOT = BODY_LOOP */ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) { int result; self->file_decl.var_name = buffer_name; throw_if_error(tokenizer_init(&self->tokenizer, self->allocator, code_buffer, buffer_name, &self->compiler->options)); result = setjmp(self->parse_env); if(result == 0) parser_parse_body_loop(self, &self->struct_decl.body, TOK_END_OF_FILE); else if(self->error.str != NULL) { switch(self->error_context) { case ERROR_CONTEXT_NONE: tokenizer_print_error_object(&self->tokenizer, &self->error); break; case ERROR_CONTEXT_RHS_STANDALONE: tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected string, variable, closure, struct, function call or import"); break; case ERROR_CONTEXT_NO_LHS: tokenizer_print_error(&self->tokenizer, self->tokenizer.prev_index, "Expected variable declaration, string, variable or function call"); break; default: assert(bool_false && "Error context handling not implemented"); break; } } if(result != 0) amal_log_error("Failed, reason: %d", result); return result; } int parser_parse_file(Parser *self, BufferView filepath) { int result; char *file_data; usize file_size; assert(!self->started && "Parser can't be reused. Create a new parser."); self->started = bool_true; assert(filepath.size > 0 && filepath.data[filepath.size] == '\0'); result = read_whole_file(filepath.data, &file_data, &file_size); if(result != 0) return result; result = parser_parse_buffer(self, create_buffer_view(file_data, file_size), filepath); return result; } static CHECK_RESULT int file_path_join(BufferView directory, BufferView file, char **result_path) { /* '/' '\0' */ return_if_error(am_malloc(directory.size + 1 + file.size + 1, (void**)result_path)); am_memcpy(*result_path, directory.data, directory.size); (*result_path)[directory.size] = '/'; am_memcpy(*result_path + directory.size + 1, file.data, file.size); (*result_path)[directory.size + 1 + file.size] = '\0'; return 0; } /* Path can be path to included library path (or system library path) in which case the path separator is a dot, otherwise the path separator is forward slash '/' */ void parser_queue_file(Parser *self, BufferView path, FileScopeReference **file_scope) { /* TODO: Parse special path (to include library path with dots) */ BufferView file_directory; char *path_relative; int result; file_directory = file_get_parent_directory(self->tokenizer.code_name); throw_if_error(file_path_join(file_directory, path, &path_relative)); /* We want buffer to be null-terminated but null character should not be included for the size */ result = amal_compiler_internal_load_file(self->compiler, path_relative, file_scope); if(result != 0) { self->error = tokenizer_create_error(&self->tokenizer, tokenizer_get_code_reference_index(&self->tokenizer, path.data), "Failed to while parsing path %s (invalid path?)", path_relative); am_free(path_relative); throw(result); } am_free(path_relative); }