aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.c
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-03-02 21:20:33 +0100
committerdec05eba <dec05eba@protonmail.com>2020-07-25 14:36:46 +0200
commit5e240bdab90c45f935e7d2b33181de13295e7e6b (patch)
tree0718d7ca2386292c5b3646d0cb1ae499bb7ba818 /src/tokenizer.c
parent2a17f5225a09c01eb04225d0241c686ea553f912 (diff)
Add string, variable and number. Fix identifier match against const and var
Diffstat (limited to 'src/tokenizer.c')
-rw-r--r--src/tokenizer.c269
1 files changed, 256 insertions, 13 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c
index fdb06cd..41d46fb 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -28,6 +28,7 @@ int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) {
self->token = TOK_NONE;
self->needs_update = bool_true;
self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8);
+ self->number_is_integer = bool_false;
return 0;
}
@@ -60,7 +61,7 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) {
}
/* Returns -1 if end of string can't be found */
-static int find_end_of_string(BufferView buf, int index) {
+static CHECK_RESULT int find_end_of_string(BufferView buf, int index) {
int c;
bool escape_quote;
escape_quote = bool_false;
@@ -77,6 +78,44 @@ static int find_end_of_string(BufferView buf, int index) {
return -1;
}
+#define I64_OVERFLOW_ERROR -1
+static CHECK_RESULT int string_to_integer_unchecked(BufferView str, i64 *result) {
+ int i;
+ i64 value;
+
+ value = 0;
+ for(i = 0; i < (int)str.size; ++i) {
+ i64 value_before;
+ value_before = value;
+ value *= 10 + str.data[i];
+ /* overflow */
+ if(value < value_before)
+ return I64_OVERFLOW_ERROR;
+ }
+
+ *result = value;
+ return 0;
+}
+
+#define F64_OVERFLOW_ERROR -1
+static CHECK_RESULT int string_to_float_unchecked(BufferView str, f64 *result) {
+ int i;
+ f64 value;
+
+ value = 0.0;
+ for(i = 0; i < (int)str.size; ++i) {
+ f64 value_before;
+ value_before = value;
+ value *= 10.0 + str.data[i];
+ /* overflow */
+ if(value < value_before)
+ return F64_OVERFLOW_ERROR;
+ }
+
+ *result = value;
+ return 0;
+}
+
static CHECK_RESULT int tokenizer_next(Tokenizer *self, Token *token);
static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
@@ -106,12 +145,23 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start);
- if(am_memeql(self->value.identifier.data, "const", 5))
- *token = TOK_CONST;
- else if(am_memeql(self->value.identifier.data, "var", 3))
- *token = TOK_VAR;
- else
- *token = TOK_IDENTIFIER;
+ switch(self->value.identifier.size) {
+ case 3: {
+ if(am_memeql(self->value.identifier.data, "var", 3)) {
+ *token = TOK_VAR;
+ return TOKENIZER_OK;
+ }
+ break;
+ }
+ case 5: {
+ if(am_memeql(self->value.identifier.data, "const", 5)) {
+ *token = TOK_CONST;
+ return TOKENIZER_OK;
+ }
+ break;
+ }
+ }
+ *token = TOK_IDENTIFIER;
} else if(c == '"') {
int string_end;
++self->index;
@@ -126,6 +176,52 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
self->index = string_end + 1;
*token = TOK_STRING;
return TOKENIZER_OK;
+ } else if(isDigit(c)) {
+ int number_start;
+ int dot_index;
+ BufferView number_str;
+ number_start = self->index;
+ dot_index = -1;
+ ++self->index;
+
+ while(self->index < (int)self->code.size) {
+ c = tokenizer_get_char(self);
+ if(isDigit(c))
+ ++self->index;
+ else if(c == '.' && dot_index == -1) {
+ dot_index = self->index;
+ ++self->index;
+ } else {
+ break;
+ }
+ }
+
+ number_str = create_buffer_view(self->code.data + number_start, self->index - number_start);
+
+ if(dot_index == -1) {
+ int result;
+ result = string_to_integer_unchecked(number_str, &self->value.integer);
+ if(result != 0) {
+ /* TODO */
+ tokenizer_print_error(self, "Integer value %.*s is too large to fit in signed 64-bit. Support for large integers is not supported yet.", number_str.size, number_str.data);
+ return TOKENIZER_ERR;
+ }
+ self->number_is_integer = bool_true;
+ } else {
+ int result;
+ result = string_to_float_unchecked(number_str, &self->value.floating);
+ if(result != 0) {
+ /* TODO */
+ tokenizer_print_error(self, "Float value %.*s is too large to fit in 64-bit. Support for large floating numbers is not supported yet.", number_str.size, number_str.data);
+ return TOKENIZER_ERR;
+ }
+ self->number_is_integer = bool_false;
+ }
+ *token = TOK_NUMBER;
+ return TOKENIZER_OK;
+ } else if(c == '.') {
+ ++self->index;
+ *token = TOK_DOT;
} else if(c == '=') {
++self->index;
*token = TOK_EQUALS;
@@ -135,6 +231,9 @@ static CHECK_RESULT int __tokenizer_next(Tokenizer *self, Token *token) {
} else if(c == ')') {
++self->index;
*token = TOK_CLOSING_PAREN;
+ } else if(c == ',') {
+ ++self->index;
+ *token = TOK_COMMA;
} else if(c == '{') {
++self->index;
*token = TOK_OPEN_BRACE;
@@ -194,16 +293,148 @@ int tokenizer_next(Tokenizer *self, Token *token) {
return result;
}
+static usize strlen(const char *str) {
+ usize len;
+ len = 0;
+ while(*str != '\0') {
+ ++len;
+ ++str;
+ }
+ return len;
+}
+
+static BufferView tokenizer_expected_token_as_string(Token token) {
+ const char *str;
+ switch(token) {
+ case TOK_NONE:
+ str = "none";
+ break;
+ case TOK_END_OF_FILE:
+ str = "<eof>";
+ break;
+ case TOK_IDENTIFIER:
+ str = "identifier";
+ break;
+ case TOK_CONST:
+ str = "const";
+ break;
+ case TOK_VAR:
+ str = "var";
+ break;
+ case TOK_STRING:
+ str = "string";
+ break;
+ case TOK_EQUALS:
+ str = "=";
+ break;
+ case TOK_OPEN_PAREN:
+ str = "(";
+ break;
+ case TOK_CLOSING_PAREN:
+ str = ")";
+ break;
+ case TOK_COMMA:
+ str = ",";
+ break;
+ case TOK_OPEN_BRACE:
+ str = "{";
+ break;
+ case TOK_CLOSING_BRACE:
+ str = "}";
+ break;
+ case TOK_IMPORT:
+ str = "import";
+ break;
+ case TOK_NUMBER:
+ str = "number";
+ break;
+ case TOK_DOT:
+ str = ".";
+ break;
+ default:
+ str = "Unknown token";
+ break;
+ }
+ return create_buffer_view(str, strlen(str));
+}
+
+static BufferView tokenizer_actual_token_as_string(Tokenizer *self) {
+ #if 0
+ const char *str;
+ switch(self->token) {
+ case TOK_NONE:
+ str = "none";
+ break;
+ case TOK_END_OF_FILE:
+ str = "<eof>";
+ break;
+ case TOK_IDENTIFIER:
+ /*return self->value.identifier;
+ */
+ str = "identifier";
+ break;
+ case TOK_CONST:
+ str = "const";
+ break;
+ case TOK_VAR:
+ str = "var";
+ break;
+ case TOK_STRING:
+ /*return self->value.string;*/
+ str = "string";
+ break;
+ case TOK_EQUALS:
+ str = "=";
+ break;
+ case TOK_OPEN_PAREN:
+ str = "(";
+ break;
+ case TOK_CLOSING_PAREN:
+ str = ")";
+ break;
+ case TOK_OPEN_BRACE:
+ str = "{";
+ break;
+ case TOK_CLOSING_BRACE:
+ str = "}";
+ break;
+ case TOK_IMPORT:
+ str = "import";
+ break;
+ case TOK_COMMA:
+ str = ",";
+ break;
+ default:
+ str = "Unknown token";
+ break;
+ }
+ return create_buffer_view(str, strlen(str));
+ #endif
+ return tokenizer_expected_token_as_string(self->token);
+}
+
int tokenizer_accept(Tokenizer *self, Token expected_token) {
Token actual_token;
- return_if_error(tokenizer_next(self, &actual_token));
- if(actual_token == expected_token) {
- self->needs_update = bool_true;
- return TOKENIZER_OK;
+ BufferView actual_token_str;
+ BufferView expected_token_str;
+
+ if(!self->needs_update) {
+ bool match;
+ match = (self->token == expected_token);
+ self->needs_update = match;
+ if(match)
+ return TOKENIZER_OK;
+ } else {
+ return_if_error(tokenizer_next(self, &actual_token));
+ if(actual_token == expected_token) {
+ self->needs_update = bool_true;
+ return TOKENIZER_OK;
+ }
}
- /* Todo: convert token to string */
- tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token);
+ expected_token_str = tokenizer_expected_token_as_string(expected_token);
+ actual_token_str = tokenizer_actual_token_as_string(self);
+ tokenizer_print_error(self, "Expected \"%.*s\", got \"%.*s\"", expected_token_str.size, expected_token_str.data, actual_token_str.size, actual_token_str.data);
return TOKENIZER_UNEXPECTED_TOKEN;
}
@@ -268,3 +499,15 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
va_end(args);
ignore_result_int(amal_mutex_unlock(mutex));
}
+
+void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error) {
+ self->prev_index = error->index;
+ tokenizer_print_error(self, error->str);
+}
+
+TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str) {
+ TokenizerError result;
+ result.index = tokenizer->prev_index;
+ result.str = err_str;
+ return result;
+} \ No newline at end of file