From 5e240bdab90c45f935e7d2b33181de13295e7e6b Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 2 Mar 2019 21:20:33 +0100 Subject: Add string, variable and number. Fix identifier match against const and var --- src/tokenizer.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 256 insertions(+), 13 deletions(-) (limited to 'src/tokenizer.c') diff --git a/src/tokenizer.c b/src/tokenizer.c index fdb06cd..41d46fb 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -28,6 +28,7 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { self->token = TOK_NONE; self->needs_update = bool_true; self->code_name = code_name.data ? code_name : create_buffer_view("", 8); + self->number_is_integer = bool_false; return 0; } @@ -60,7 +61,7 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) { } /* Returns -1 if end of string can't be found */ -static int find_end_of_string(BufferView buf, int index) { +static CHECK_RESULT int find_end_of_string(BufferView buf, int index) { int c; bool escape_quote; escape_quote = bool_false; @@ -77,6 +78,44 @@ static int find_end_of_string(BufferView buf, int index) { return -1; } +#define I64_OVERFLOW_ERROR -1 +static CHECK_RESULT int string_to_integer_unchecked(BufferView str, i64 *result) { + int i; + i64 value; + + value = 0; + for(i = 0; i < (int)str.size; ++i) { + i64 value_before; + value_before = value; + value *= 10 + str.data[i]; + /* overflow */ + if(value < value_before) + return I64_OVERFLOW_ERROR; + } + + *result = value; + return 0; +} + +#define F64_OVERFLOW_ERROR -1 +static CHECK_RESULT int string_to_float_unchecked(BufferView str, f64 *result) { + int i; + f64 value; + + value = 0.0; + for(i = 0; i < (int)str.size; ++i) { + f64 value_before; + value_before = value; + value *= 10.0 + str.data[i]; + /* overflow */ + if(value < value_before) + return F64_OVERFLOW_ERROR; + } + + *result = value; + return 0; +} + static CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token); static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { @@ -106,12 +145,23 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start); - if(am_memeql(self->value.identifier.data, "const", 5)) - *token = TOK_CONST; - else if(am_memeql(self->value.identifier.data, "var", 3)) - *token = TOK_VAR; - else - *token = TOK_IDENTIFIER; + switch(self->value.identifier.size) { + case 3: { + if(am_memeql(self->value.identifier.data, "var", 3)) { + *token = TOK_VAR; + return TOKENIZER_OK; + } + break; + } + case 5: { + if(am_memeql(self->value.identifier.data, "const", 5)) { + *token = TOK_CONST; + return TOKENIZER_OK; + } + break; + } + } + *token = TOK_IDENTIFIER; } else if(c == '"') { int string_end; ++self->index; @@ -126,6 +176,52 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { self->index = string_end + 1; *token = TOK_STRING; return TOKENIZER_OK; + } else if(isDigit(c)) { + int number_start; + int dot_index; + BufferView number_str; + number_start = self->index; + dot_index = -1; + ++self->index; + + while(self->index < (int)self->code.size) { + c = tokenizer_get_char(self); + if(isDigit(c)) + ++self->index; + else if(c == '.' && dot_index == -1) { + dot_index = self->index; + ++self->index; + } else { + break; + } + } + + number_str = create_buffer_view(self->code.data + number_start, self->index - number_start); + + if(dot_index == -1) { + int result; + result = string_to_integer_unchecked(number_str, &self->value.integer); + if(result != 0) { + /* TODO */ + tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data); + return TOKENIZER_ERR; + } + self->number_is_integer = bool_true; + } else { + int result; + result = string_to_float_unchecked(number_str, &self->value.floating); + if(result != 0) { + /* TODO */ + tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data); + return TOKENIZER_ERR; + } + self->number_is_integer = bool_false; + } + *token = TOK_NUMBER; + return TOKENIZER_OK; + } else if(c == '.') { + ++self->index; + *token = TOK_DOT; } else if(c == '=') { ++self->index; *token = TOK_EQUALS; @@ -135,6 +231,9 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) { } else if(c == ')') { ++self->index; *token = TOK_CLOSING_PAREN; + } else if(c == ',') { + ++self->index; + *token = TOK_COMMA; } else if(c == '{') { ++self->index; *token = TOK_OPEN_BRACE; @@ -194,16 +293,148 @@ int tokenizer_next(Tokenizer *self, Token *token) { return result; } +static usize strlen(const char *str) { + usize len; + len = 0; + while(*str != '\0') { + ++len; + ++str; + } + return len; +} + +static BufferView tokenizer_expected_token_as_string(Token token) { + const char *str; + switch(token) { + case TOK_NONE: + str = "none"; + break; + case TOK_END_OF_FILE: + str = ""; + break; + case TOK_IDENTIFIER: + str = "identifier"; + break; + case TOK_CONST: + str = "const"; + break; + case TOK_VAR: + str = "var"; + break; + case TOK_STRING: + str = "string"; + break; + case TOK_EQUALS: + str = "="; + break; + case TOK_OPEN_PAREN: + str = "("; + break; + case TOK_CLOSING_PAREN: + str = ")"; + break; + case TOK_COMMA: + str = ","; + break; + case TOK_OPEN_BRACE: + str = "{"; + break; + case TOK_CLOSING_BRACE: + str = "}"; + break; + case TOK_IMPORT: + str = "import"; + break; + case TOK_NUMBER: + str = "number"; + break; + case TOK_DOT: + str = "."; + break; + default: + str = "Unknown token"; + break; + } + return create_buffer_view(str, strlen(str)); +} + +static BufferView tokenizer_actual_token_as_string(Tokenizer *self) { + #if 0 + const char *str; + switch(self->token) { + case TOK_NONE: + str = "none"; + break; + case TOK_END_OF_FILE: + str = ""; + break; + case TOK_IDENTIFIER: + /*return self->value.identifier; + */ + str = "identifier"; + break; + case TOK_CONST: + str = "const"; + break; + case TOK_VAR: + str = "var"; + break; + case TOK_STRING: + /*return self->value.string;*/ + str = "string"; + break; + case TOK_EQUALS: + str = "="; + break; + case TOK_OPEN_PAREN: + str = "("; + break; + case TOK_CLOSING_PAREN: + str = ")"; + break; + case TOK_OPEN_BRACE: + str = "{"; + break; + case TOK_CLOSING_BRACE: + str = "}"; + break; + case TOK_IMPORT: + str = "import"; + break; + case TOK_COMMA: + str = ","; + break; + default: + str = "Unknown token"; + break; + } + return create_buffer_view(str, strlen(str)); + #endif + return tokenizer_expected_token_as_string(self->token); +} + int tokenizer_accept(Tokenizer *self, Token expected_token) { Token actual_token; - return_if_error(tokenizer_next(self, &actual_token)); - if(actual_token == expected_token) { - self->needs_update = bool_true; - return TOKENIZER_OK; + BufferView actual_token_str; + BufferView expected_token_str; + + if(!self->needs_update) { + bool match; + match = (self->token == expected_token); + self->needs_update = match; + if(match) + return TOKENIZER_OK; + } else { + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) { + self->needs_update = bool_true; + return TOKENIZER_OK; + } } - /* Todo: convert token to string */ - tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); + expected_token_str = tokenizer_expected_token_as_string(expected_token); + actual_token_str = tokenizer_actual_token_as_string(self); + tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data); return TOKENIZER_UNEXPECTED_TOKEN; } @@ -268,3 +499,15 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { va_end(args); ignore_result_int(amal_mutex_unlock(mutex)); } + +void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) { + self->prev_index = error->index; + tokenizer_print_error(self, error->str); +} + +TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str) { + TokenizerError result; + result.index = tokenizer->prev_index; + result.str = err_str; + return result; +} \ No newline at end of file -- cgit v1.2.3