diff options
Diffstat (limited to 'src/tokenizer.c')
-rw-r--r-- | src/tokenizer.c | 87 |
1 files changed, 82 insertions, 5 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c index fda3e40..e85c952 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -1,5 +1,7 @@ #include "../include/tokenizer.h" -#include "../include/mem.h" +#include "../include/std/mem.h" +#include "../include/std/log.h" +#include "../include/std/thread.h" #include <assert.h> #include <limits.h> #include <stdio.h> @@ -17,12 +19,13 @@ static int isAlphaDigit(int c) { return isAlpha(c) || isDigit(c); } -int tokenizer_init(Tokenizer *self, BufferView code) { +int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name) { assert(code.size <= INT_MAX); self->code = code; self->index = 0; self->prev_index = 0; self->line = 1; + self->code_name = code_name.data ? code_name : create_buffer_view("<buffer>", 8); return 0; } @@ -52,9 +55,28 @@ static Token tokenizer_skip_whitespace(Tokenizer *self) { } } +/* Returns -1 if end of string can't be found */ +static int find_end_of_string(BufferView buf, int index) { + int c; + bool escape_quote; + escape_quote = bool_false; + + for(; index < (int)buf.size; ++index) { + c = buf.data[index]; + if(c == '\\') + escape_quote = !escape_quote; + else if(!escape_quote && c == '"') + return index; + else + escape_quote = bool_false; + } + return -1; +} + int tokenizer_next(Tokenizer *self, Token *token) { Token last_token; int c; + int result; last_token = tokenizer_skip_whitespace(self); if(last_token == TOK_END_OF_FILE) { @@ -85,6 +107,20 @@ int tokenizer_next(Tokenizer *self, Token *token) { *token = TOK_VAR; else *token = TOK_IDENTIFIER; + } else if(c == '"') { + int string_end; + ++self->index; + string_end = find_end_of_string(self->code, self->index); + if(string_end == -1) { + tokenizer_print_error(self, "String end not found. Did you forget '\"' or did you have a mismatch of number of '\"'?"); + return TOKENIZER_ERR; + } + + self->value.string.data = &self->code.data[self->index]; + self->value.string.size = string_end - self->index; + self->index = string_end + 1; + *token = TOK_STRING; + return TOKENIZER_OK; } else if(c == '=') { ++self->index; *token = TOK_EQUALS; @@ -100,8 +136,45 @@ int tokenizer_next(Tokenizer *self, Token *token) { } else if(c == '}') { ++self->index; *token = TOK_CLOSING_BRACE; + } else if(c == '@') { + const char *err_msg; + ++self->index; + if(self->index + 6 >= (int)self->code.size || !am_memeql(self->code.data + self->index, "import", 6)) { + err_msg = "Expected '@import(path)'"; + goto import_error; + } + self->index += 6; + + result = tokenizer_next(self, &last_token); + if(result != 0 || last_token != TOK_OPEN_PAREN) { + err_msg = "Expected '(' after @import"; + goto import_error; + } + + result = tokenizer_next(self, &last_token); + if(result != 0 || last_token != TOK_STRING) { + err_msg = "Expected string after @import("; + goto import_error; + } + + if(self->value.string.size == 0) { + err_msg = "Path in @import can't be empty"; + goto import_error; + } + + result = tokenizer_next(self, &last_token); + if(result != 0 || last_token != TOK_CLOSING_PAREN) { + err_msg = "Expected ')' after @import(path"; + goto import_error; + } + + *token = TOK_IMPORT; + return TOKENIZER_OK; + + import_error: + tokenizer_print_error(self, err_msg); + return TOKENIZER_ERR; } else { - /*self.printError("Unexpected symbol '{c}'", c);*/ tokenizer_print_error(self, "Unexpected symbol '%c'", c); return TOKENIZER_UNEXPECTED_TOKEN; } @@ -130,8 +203,8 @@ int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) { if(actual_token == expected_token) { *result = bool_true; } else { - /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */ self->index = index; + self->prev_index = index; self->line = line; *result = bool_false; } @@ -167,16 +240,20 @@ void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { int line_end; int prev_column; int i; + amal_mutex *mutex; + mutex = amal_log_get_mutex(); + ignore_result_int(amal_mutex_lock(mutex, "tokenizer_print_error")); va_start(args, fmt); line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index); line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index); prev_column = self->prev_index - line_start; - fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column); + fprintf(stderr, "\x1b[1;37m%.*s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", (int)self->code_name.size, self->code_name.data, self->line, 1 + prev_column); vfprintf(stderr, fmt, args); fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start); for(i = 0; i < prev_column; ++i) fprintf(stderr, " "); fprintf(stderr, "\x1b[1;32m^\x1b[0m\n"); va_end(args); + ignore_result_int(amal_mutex_unlock(mutex)); }
\ No newline at end of file |