From 5e240bdab90c45f935e7d2b33181de13295e7e6b Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 2 Mar 2019 21:20:33 +0100 Subject: Add string, variable and number. Fix identifier match against const and var --- src/ast.c | 14 ++- src/parser.c | 175 +++++++++++++++++++++++++----------- src/tokenizer.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 391 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/ast.c b/src/ast.c index 56eb34f..75c8afc 100644 --- a/src/ast.c +++ b/src/ast.c @@ -17,8 +17,9 @@ int funcdecl_add_to_body(FunctionDecl *self, Ast ast) { return BUFFER_OK; } -void funccall_init(FunctionCall *self, BufferView name) { +int funccall_init(FunctionCall *self, BufferView name, ScopedAllocator *allocator) { self->name = name; + return buffer_init(&self->args, allocator); } void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) { @@ -30,3 +31,14 @@ void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) { void import_init(Import *self, BufferView path) { self->path = path; } + +int string_init(String *self, BufferView str) { + /* TODO: Convert special characters. For example \n should be converted to binary newline etc */ + self->str = str; + return 0; +} + +void number_init(Number *self, i64 value, bool is_integer) { + self->value.integer = value; + self->is_integer = is_integer; +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c index e63814f..539ebd3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -10,8 +10,9 @@ #include #include -static CHECK_RESULT int parser_queue_file(Parser *self, BufferView path); +static CHECK_RESULT int parser_parse_rhs_start(Parser *self, Ast *rhs_expr); static CHECK_RESULT int parser_parse_body(Parser *self, Ast *ast); +static CHECK_RESULT int parser_queue_file(Parser *self, BufferView path); int parser_thread_data_init(ParserThreadData *self) { am_memset(&self->allocator, 0, sizeof(self->allocator)); @@ -42,9 +43,32 @@ int parser_init(Parser *self, amal_compiler *compiler, ScopedAllocator *allocato self->allocator = allocator; self->compiler = compiler; self->started = bool_false; + self->error.index = 0; + self->error.str = NULL; return buffer_init(&self->ast_objects, self->allocator); } +static bool parser_got_overwritable_error(Parser *self) { + return self->error.str != NULL; +} + +/* +BODY_LOOP = BODY* @end_token +*/ +static CHECK_RESULT int parser_parse_body_loop(Parser *self, Buffer *body_list, Token end_token) { + for(;;) { + Ast body_obj; + bool is_end_token; + return_if_error(tokenizer_consume_if(&self->tokenizer, end_token, &is_end_token)); + if(is_end_token) + break; + + return_if_error(parser_parse_body(self, &body_obj)); + return_if_error(buffer_append(body_list, &body_obj, sizeof(body_obj))); + } + return PARSER_OK; +} + /* LHS = 'const'|'var' IDENTIFIER */ @@ -70,7 +94,7 @@ static CHECK_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) { } /* -CLOSURE = '(' PARAM* ')' '{' BODY* '}' +CLOSURE = '(' PARAM* ')' '{' BODY_LOOP '}' */ static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { bool result; @@ -88,38 +112,65 @@ static CHECK_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl ** return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionDecl), (void**)func_decl)); return_if_error(funcdecl_init(*func_decl, self->allocator)); + return parser_parse_body_loop(self, &(*func_decl)->body, TOK_CLOSING_BRACE); +} + +/* +FUNC_ARGS = (RHS_START)? (',' RHS_START)* ')' +*/ +static CHECK_RESULT int parser_parse_function_args(Parser *self, FunctionCall *func_call) { + bool first_arg; + first_arg = bool_true; + for(;;) { - Ast body_obj; - return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result)); - if(result) + Ast arg_expr; + bool is_end_token; + arg_expr = ast_none(); + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_PAREN, &is_end_token)); + if(is_end_token) break; - return_if_error(parser_parse_body(self, &body_obj)); - return_if_error(funcdecl_add_to_body(*func_decl, body_obj)); + if(!first_arg) + return_if_error(tokenizer_accept(&self->tokenizer, TOK_COMMA)); + first_arg = bool_false; + + return_if_error(parser_parse_rhs_start(self, &arg_expr)); + return_if_error(buffer_append(&func_call->args, &arg_expr, sizeof(arg_expr))); } + return PARSER_OK; } /* -FUNC_CALL = IDENTIFIER '(' RHS* ')' +VARIABLE = IDENTIFIER +FUNC_CALL_OR_VARIABLE = VARIABLE '(' FUNC_ARGS ')' */ -static CHECK_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) { +static CHECK_RESULT int parser_parse_function_call_or_variable(Parser *self, Ast *expr) { bool result; - BufferView func_name; - *func_call = NULL; + BufferView identifier; + FunctionCall *func_call; return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result)); if(!result) return PARSER_OK; - func_name = self->tokenizer.value.identifier; - return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); - /* TODO: Parse arguments */ - return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); - - return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)func_call)); - funccall_init(*func_call, func_name); - return PARSER_OK; + identifier = self->tokenizer.value.identifier; + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result)); + if(!result) { + Variable *variable; + return_if_error(scoped_allocator_alloc(self->allocator, sizeof(Variable), (void**)&variable)); + variable->name = identifier; + expr->type = AST_VARIABLE; + expr->value.variable = variable; + return PARSER_OK; + } + + return_if_error(scoped_allocator_alloc(self->allocator, sizeof(FunctionCall), (void**)&func_call)); + return_if_error(funccall_init(func_call, self->tokenizer.value.identifier, self->allocator)); + expr->type = AST_FUNCTION_CALL; + expr->value.func_call = func_call; + return parser_parse_function_args(self, func_call); } /* @@ -138,32 +189,56 @@ static CHECK_RESULT int parser_parse_import(Parser *self, Import **import) { return PARSER_OK; } +static CHECK_RESULT int parser_parse_number(Parser *self, Ast *rhs_expr) { + bool result; + Number *number; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_NUMBER, &result)); + if(!result) + return PARSER_OK; + + return_if_error(scoped_allocator_alloc(self->allocator, sizeof(Number), (void**)&number)); + number_init(number, self->tokenizer.value.integer, self->tokenizer.number_is_integer); + rhs_expr->type = AST_NUMBER; + rhs_expr->value.number = number; + return PARSER_OK; +} + /* -RHS = CLOSURE | FUNC_CALL | IMPORT +RHS = STRING | NUMBER | FUNC_CALL_OR_VARIABLE */ static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { - FunctionDecl *func_decl; - FunctionCall *func_call; - Import *import; - /* bool result;*/ + bool result; -/* return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_STRING, &result)); if(result) { String *string; return_if_error(scoped_allocator_alloc(self->allocator, sizeof(String), (void**)&string)); - string_init(string, self->tokenizer.value.string); + return_if_error(string_init(string, self->tokenizer.value.string)); rhs_expr->type = AST_STRING; - rhs_expr->value.string = func_call; + rhs_expr->value.string = string; return PARSER_OK; } -*/ - return_if_error(parser_parse_function_call(self, &func_call)); - if(func_call) { - rhs_expr->type = AST_FUNCTION_CALL; - rhs_expr->value.func_call = func_call; + + return_if_error(parser_parse_number(self, rhs_expr)); + if(rhs_expr->type != AST_NONE) return PARSER_OK; - } + + return_if_error(parser_parse_function_call_or_variable(self, rhs_expr)); + if(rhs_expr->type != AST_NONE) + return PARSER_OK; + + self->error = tokenizer_create_error(&self->tokenizer, "Expected string, variable or function call"); + return PARSER_UNEXPECTED_TOKEN; +} + +/* +RHS_START = CLOSURE | IMPORT | RHS +*/ +int parser_parse_rhs_start(Parser *self, Ast *rhs_expr) { + FunctionDecl *func_decl; + Import *import; + int result; return_if_error(parser_parse_function_decl(self, &func_decl)); if(func_decl) { @@ -180,24 +255,26 @@ static CHECK_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { return PARSER_OK; } - /* TODO: Convert token to string */ - tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call"); - return PARSER_UNEXPECTED_TOKEN; + result = parser_parse_rhs(self, rhs_expr); + if(result == PARSER_UNEXPECTED_TOKEN && parser_got_overwritable_error(self)) + self->error = tokenizer_create_error(&self->tokenizer, "Expected string, variable, closure, function call or import"); + return result; } /* -BODY = (LHS '=' RHS) | - RHS +BODY = (LHS '=' RHS_START) | + RHS_START */ int parser_parse_body(Parser *self, Ast *ast) { LhsExpr *lhs_expr; Ast rhs_expr; + rhs_expr = ast_none(); return_if_error(parser_parse_lhs(self, &lhs_expr)); if(lhs_expr) return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS)); - return_if_error(parser_parse_rhs(self, &rhs_expr)); + return_if_error(parser_parse_rhs_start(self, &rhs_expr)); if(lhs_expr) { lhs_expr->rhs_expr = rhs_expr; ast->type = AST_LHS; @@ -209,23 +286,15 @@ int parser_parse_body(Parser *self, Ast *ast) { } /* -ROOT = BODY* +ROOT = BODY_LOOP */ int parser_parse_buffer(Parser *self, BufferView code_buffer, BufferView buffer_name) { - Ast ast; + int result; return_if_error(tokenizer_init(&self->tokenizer, code_buffer, buffer_name)); - - for(;;) { - bool isEof; - return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof)); - if(isEof) - break; - - return_if_error(parser_parse_body(self, &ast)); - return_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast))); - } - - return PARSER_OK; + result = parser_parse_body_loop(self, &self->ast_objects, TOK_END_OF_FILE); + if(self->error.str != NULL) + tokenizer_print_error_object(&self->tokenizer, &self->error); + return result; } int parser_parse_file(Parser *self, BufferView filepath) { diff --git a/src/tokenizer.c b/src/tokenizer.c index fdb06cd..41d46fb 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -28,6 +28,7 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { self->token = TOK_NONE; self->needs_update = bool_true; self->code_name = code_name.data ? code_name : create_buffer_view("", 8); + self->number_is_integer = bool_false; return 0; } @@ -60,7 +61,7 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) { } /* Returns -1 if end of string can't be found */ -static int find_end_of_string(BufferView buf, int index) { +static CHECK_RESULT int find_end_of_string(BufferView buf, int index) { int c; bool escape_quote; escape_quote = bool_false; @@ -77,6 +78,44 @@ static int find_end_of_string(BufferView buf, int index) { return -1; } +#define I64_OVERFLOW_ERROR -1 +static CHECK_RESULT int string_to_integer_unchecked(BufferView str, i64 *result) { + int i; + i64 value; + + value = 0; + for(i = 0; i < (int)str.size; ++i) { + i64 value_before; + value_before = value; + value *= 10 + str.data[i]; + /* overflow */ + if(value < value_before) + return I64_OVERFLOW_ERROR; + } + + *result = value; + return 0; +} + +#define F64_OVERFLOW_ERROR -1 +static CHECK_RESULT int string_to_float_unchecked(BufferView str, f64 *result) { + int i; + f64 value; + + value = 0.0; + for(i = 0; i < (int)str.size; ++i) { + f64 value_before; + value_before = value; + value *= 10.0 + str.data[i]; + /* overflow */ + if(value < value_before) + return F64_OVERFLOW_ERROR; + } + + *result = value; + return 0; +} + static CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token); static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { @@ -106,12 +145,23 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start); - if(am_memeql(self->value.identifier.data, "const", 5)) - *token = TOK_CONST; - else if(am_memeql(self->value.identifier.data, "var", 3)) - *token = TOK_VAR; - else - *token = TOK_IDENTIFIER; + switch(self->value.identifier.size) { + case 3: { + if(am_memeql(self->value.identifier.data, "var", 3)) { + *token = TOK_VAR; + return TOKENIZER_OK; + } + break; + } + case 5: { + if(am_memeql(self->value.identifier.data, "const", 5)) { + *token = TOK_CONST; + return TOKENIZER_OK; + } + break; + } + } + *token = TOK_IDENTIFIER; } else if(c == '"') { int string_end; ++self->index; @@ -126,6 +176,52 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { self->index = string_end + 1; *token = TOK_STRING; return TOKENIZER_OK; + } else if(isDigit(c)) { + int number_start; + int dot_index; + BufferView number_str; + number_start = self->index; + dot_index = -1; + ++self->index; + + while(self->index < (int)self->code.size) { + c = tokenizer_get_char(self); + if(isDigit(c)) + ++self->index; + else if(c == '.' && dot_index == -1) { + dot_index = self->index; + ++self->index; + } else { + break; + } + } + + number_str = create_buffer_view(self->code.data + number_start, self->index - number_start); + + if(dot_index == -1) { + int result; + result = string_to_integer_unchecked(number_str, &self->value.integer); + if(result != 0) { + /* TODO */ + tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data); + return TOKENIZER_ERR; + } + self->number_is_integer = bool_true; + } else { + int result; + result = string_to_float_unchecked(number_str, &self->value.floating); + if(result != 0) { + /* TODO */ + tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data); + return TOKENIZER_ERR; + } + self->number_is_integer = bool_false; + } + *token = TOK_NUMBER; + return TOKENIZER_OK; + } else if(c == '.') { + ++self->index; + *token = TOK_DOT; } else if(c == '=') { ++self->index; *token = TOK_EQUALS; @@ -135,6 +231,9 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { } else if(c == ')') { ++self->index; *token = TOK_CLOSING_PAREN; + } else if(c == ',') { + ++self->index; + *token = TOK_COMMA; } else if(c == '{') { ++self->index; *token = TOK_OPEN_BRACE; @@ -194,16 +293,148 @@ int tokenizer_next(Tokenizer *self, Token *token) { return result; } +static usize strlen(const char *str) { + usize len; + len = 0; + while(*str != '\0') { + ++len; + ++str; + } + return len; +} + +static BufferView tokenizer_expected_token_as_string(Token token) { + const char *str; + switch(token) { + case TOK_NONE: + str = "none"; + break; + case TOK_END_OF_FILE: + str = ""; + break; + case TOK_IDENTIFIER: + str = "identifier"; + break; + case TOK_CONST: + str = "const"; + break; + case TOK_VAR: + str = "var"; + break; + case TOK_STRING: + str = "string"; + break; + case TOK_EQUALS: + str = "="; + break; + case TOK_OPEN_PAREN: + str = "("; + break; + case TOK_CLOSING_PAREN: + str = ")"; + break; + case TOK_COMMA: + str = ","; + break; + case TOK_OPEN_BRACE: + str = "{"; + break; + case TOK_CLOSING_BRACE: + str = "}"; + break; + case TOK_IMPORT: + str = "import"; + break; + case TOK_NUMBER: + str = "number"; + break; + case TOK_DOT: + str = "."; + break; + default: + str = "Unknown token"; + break; + } + return create_buffer_view(str, strlen(str)); +} + +static BufferView tokenizer_actual_token_as_string(Tokenizer *self) { + #if 0 + const char *str; + switch(self->token) { + case TOK_NONE: + str = "none"; + break; + case TOK_END_OF_FILE: + str = ""; + break; + case TOK_IDENTIFIER: + /*return self->value.identifier; + */ + str = "identifier"; + break; + case TOK_CONST: + str = "const"; + break; + case TOK_VAR: + str = "var"; + break; + case TOK_STRING: + /*return self->value.string;*/ + str = "string"; + break; + case TOK_EQUALS: + str = "="; + break; + case TOK_OPEN_PAREN: + str = "("; + break; + case TOK_CLOSING_PAREN: + str = ")"; + break; + case TOK_OPEN_BRACE: + str = "{"; + break; + case TOK_CLOSING_BRACE: + str = "}"; + break; + case TOK_IMPORT: + str = "import"; + break; + case TOK_COMMA: + str = ","; + break; + default: + str = "Unknown token"; + break; + } + return create_buffer_view(str, strlen(str)); + #endif + return tokenizer_expected_token_as_string(self->token); +} + int tokenizer_accept(Tokenizer *self, Token expected_token) { Token actual_token; - return_if_error(tokenizer_next(self, &actual_token)); - if(actual_token == expected_token) { - self->needs_update = bool_true; - return TOKENIZER_OK; + BufferView actual_token_str; + BufferView expected_token_str; + + if(!self->needs_update) { + bool match; + match = (self->token == expected_token); + self->needs_update = match; + if(match) + return TOKENIZER_OK; + } else { + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) { + self->needs_update = bool_true; + return TOKENIZER_OK; + } } - /* Todo: convert token to string */ - tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); + expected_token_str = tokenizer_expected_token_as_string(expected_token); + actual_token_str = tokenizer_actual_token_as_string(self); + tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data); return TOKENIZER_UNEXPECTED_TOKEN; } @@ -268,3 +499,15 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { va_end(args); ignore_result_int(amal_mutex_unlock(mutex)); } + +void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) { + self->prev_index = error->index; + tokenizer_print_error(self, error->str); +} + +TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str) { + TokenizerError result; + result.index = tokenizer->prev_index; + result.str = err_str; + return result; +} \ No newline at end of file -- cgit v1.2.3