From 00ab5c3488c02beab5c3f4e371f5196404334e3c Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 2 Mar 2019 00:40:08 +0100 Subject: Fix crash in parser import, optimize tokenizer_consume_if to not reparse if already parsed --- doc/IMPLEMENTATION.md | 2 ++ include/tokenizer.h | 8 ++++++-- src/parser.c | 29 +++++++++++++++++++++-------- src/tokenizer.c | 37 +++++++++++++++++++++++++++---------- tests/main.amal | 2 +- 5 files changed, 57 insertions(+), 21 deletions(-) diff --git a/doc/IMPLEMENTATION.md b/doc/IMPLEMENTATION.md index 1d86297..0da1c1a 100644 --- a/doc/IMPLEMENTATION.md +++ b/doc/IMPLEMENTATION.md @@ -1,5 +1,7 @@ # Goal 1. In the first stage the parser parses multiple files at the same time using multiple threads. +The tokenization should be done without storing the tokens in a list (streaming) but AST needs to be stored in a list +because the compiler needs to support out of order declarations. 2. In the second stage the ast is handled using multiple threads. In this stage, variables, parameters and types are defined and resolved and if a type is defined after there is a reference to it, then the compiler first resolves that type. There are flags set to make sure there aren't recursive dependencies. diff --git a/include/tokenizer.h b/include/tokenizer.h index e79f070..fac61e7 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -29,6 +29,12 @@ typedef struct { int index; int prev_index; int line; + Token token; + /* + @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again, + then do not rollback to previous token and instead reuse the already parsed token + */ + bool needs_update; BufferView code_name; union { @@ -38,8 +44,6 @@ typedef struct { } Tokenizer; CHECK_RESULT int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name); - -CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token); CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); /* @result is set to 0 if the next token is equal to @expected_token, diff --git a/src/parser.c b/src/parser.c index 81f0a92..e63814f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -70,7 +70,7 @@ static CHECK_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) { } /* -FUNC_DECL = '(' PARAM* ')' '{' BODY* '}' +CLOSURE = '(' PARAM* ')' '{' BODY* '}' */ static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { bool result; @@ -101,7 +101,7 @@ static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl ** } /* -FUNC_CALL = IDENTIFIER '(' ARGS* ')' +FUNC_CALL = IDENTIFIER '(' RHS* ')' */ static CHECK_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) { bool result; @@ -127,6 +127,7 @@ IMPORT = IMPORT_SYMBOL */ static CHECK_RESULT int parser_parse_import(Parser *self, Import **import) { bool result; + *import = NULL; return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IMPORT, &result)); if(!result) @@ -138,20 +139,25 @@ static CHECK_RESULT int parser_parse_import(Parser *self, Import **import) { } /* -RHS = FUNC_DECL | FUNC_CALL | IMPORT +RHS = CLOSURE | FUNC_CALL | IMPORT */ static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { FunctionDecl *func_decl; FunctionCall *func_call; Import *import; + /* bool result;*/ - return_if_error(parser_parse_function_decl(self, &func_decl)); - if(func_decl) { - rhs_expr->type = AST_FUNCTION_DECL; - rhs_expr->value.func_decl = func_decl; +/* + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &result)); + if(result) { + String *string; + return_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string)); + string_init(string, self->tokenizer.value.string); + rhs_expr->type = AST_STRING; + rhs_expr->value.string = func_call; return PARSER_OK; } - +*/ return_if_error(parser_parse_function_call(self, &func_call)); if(func_call) { rhs_expr->type = AST_FUNCTION_CALL; @@ -159,6 +165,13 @@ static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { return PARSER_OK; } + return_if_error(parser_parse_function_decl(self, &func_decl)); + if(func_decl) { + rhs_expr->type = AST_FUNCTION_DECL; + rhs_expr->value.func_decl = func_decl; + return PARSER_OK; + } + return_if_error(parser_parse_import(self, &import)); if(import) { rhs_expr->type = AST_IMPORT; diff --git a/src/tokenizer.c b/src/tokenizer.c index 742f9ca..b9f0ad3 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -25,6 +25,8 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { self->index = 0; self->prev_index = 0; self->line = 1; + self->token = TOK_NONE; + self->needs_update = bool_true; self->code_name = code_name.data ? code_name : create_buffer_view("", 8); return 0; } @@ -49,10 +51,12 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) { case '\t': break; default: + self->prev_index = self->index; return TOK_NONE; } ++self->index; } + self->prev_index = self->index; } /* Returns -1 if end of string can't be found */ @@ -73,7 +77,9 @@ static int find_end_of_string(BufferView buf, int index) { return -1; } -int tokenizer_next(Tokenizer *self, Token *token) { +static CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token); + +static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { Token last_token; int c; int result; @@ -84,7 +90,6 @@ int tokenizer_next(Tokenizer *self, Token *token) { return TOKENIZER_OK; } - self->prev_index = self->index; c = tokenizer_get_char(self); if(isAlpha(c) || c == '_') { int identifier_start; @@ -181,11 +186,21 @@ int tokenizer_next(Tokenizer *self, Token *token) { return TOKENIZER_OK; } +/* Wrapper around __tokenizer_next to store last parsed token */ +int tokenizer_next(Tokenizer *self, Token *token) { + int result; + result = __tokenizer_next(self, token); + self->token = *token; + return result; +} + int tokenizer_accept(Tokenizer *self, Token expected_token) { Token actual_token; return_if_error(tokenizer_next(self, &actual_token)); - if(actual_token == expected_token) + if(actual_token == expected_token) { + self->needs_update = bool_true; return TOKENIZER_OK; + } /* Todo: convert token to string */ tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); @@ -193,19 +208,21 @@ int tokenizer_accept(Tokenizer *self, Token expected_token) { } int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) { - int index; - int line; Token actual_token; - index = self->index; - line = self->line; + if(!self->needs_update) { + *result = (self->token == expected_token); + if(*result) + self->needs_update = bool_true; + return TOKENIZER_OK; + } + return_if_error(tokenizer_next(self, &actual_token)); if(actual_token == expected_token) { + self->needs_update = bool_true; *result = bool_true; } else { - self->index = index; - self->prev_index = index; - self->line = line; + self->needs_update = bool_false; *result = bool_false; } return TOKENIZER_OK; diff --git a/tests/main.amal b/tests/main.amal index dde97f7..0cd7154 100644 --- a/tests/main.amal +++ b/tests/main.amal @@ -4,7 +4,7 @@ const main = () { var hello = () { } - hello() + const value = "hello, world!"; } const print = () { -- cgit v1.2.3