#include "../include/tokenizer.h" #include "../include/mem.h" #include #include #include #include static int isAlpha(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static int isDigit(int c) { return c >= '0' && c <= '9'; } static int isAlphaDigit(int c) { return isAlpha(c) || isDigit(c); } int tokenizer_init(Tokenizer *self, BufferView code) { assert(code.size <= INT_MAX); self->code = code; self->index = 0; self->prev_index = 0; self->line = 1; return 0; } static int tokenizer_get_char(Tokenizer *self) { assert(self->index >= 0 && self->index < (int)self->code.size); return self->code.data[self->index]; } static Token tokenizer_skip_whitespace(Tokenizer *self) { int c; for(;;) { if(self->index >= (int)self->code.size) return TOK_END_OF_FILE; c = self->code.data[self->index]; switch(c) { case '\n': ++self->line; /* fallthrough */ case ' ': case '\t': break; default: return TOK_NONE; } ++self->index; } } int tokenizer_next(Tokenizer *self, Token *token) { Token last_token; int c; last_token = tokenizer_skip_whitespace(self); if(last_token == TOK_END_OF_FILE) { *token = TOK_END_OF_FILE; return TOKENIZER_OK; } self->prev_index = self->index; c = tokenizer_get_char(self); if(isAlpha(c) || c == '_') { int identifier_start; identifier_start = self->index; ++self->index; while(self->index < (int)self->code.size) { c = tokenizer_get_char(self); if(isAlphaDigit(c) || c == '_') ++self->index; else break; } self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start); if(am_memeql(self->value.identifier.data, "const", 5)) *token = TOK_CONST; else if(am_memeql(self->value.identifier.data, "var", 3)) *token = TOK_VAR; else *token = TOK_IDENTIFIER; } else if(c == '=') { ++self->index; *token = TOK_EQUALS; } else if(c == '(') { ++self->index; *token = TOK_OPEN_PAREN; } else if(c == ')') { ++self->index; *token = TOK_CLOSING_PAREN; } else if(c == '{') { ++self->index; *token = TOK_OPEN_BRACE; } else if(c == '}') { ++self->index; *token = TOK_CLOSING_BRACE; } else { /*self.printError("Unexpected symbol '{c}'", c);*/ tokenizer_print_error(self, "Unexpected symbol '%c'", c); return TOKENIZER_UNEXPECTED_TOKEN; } return TOKENIZER_OK; } int tokenizer_accept(Tokenizer *self, Token expected_token) { Token actual_token; return_if_error(tokenizer_next(self, &actual_token)); if(actual_token == expected_token) return TOKENIZER_OK; /* Todo: convert token to string */ tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); return TOKENIZER_UNEXPECTED_TOKEN; } int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) { int index; int line; Token actual_token; index = self->index; line = self->line; return_if_error(tokenizer_next(self, &actual_token)); if(actual_token == expected_token) { *result = bool_true; } else { /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */ self->index = index; self->line = line; *result = bool_false; } return TOKENIZER_OK; } static int tokenizer_get_start_of_line_from_index(Tokenizer *self, int index) { int c; while(index >= 0) { c = self->code.data[(usize)index]; if(c == '\n' || c == '\r') { return index + 1; } --index; } return 0; } static int tokenizer_get_end_of_line_from_index(Tokenizer *self, int index) { int c; while(index < (int)self->code.size) { c = self->code.data[(usize)index]; if(c == '\n' || c == '\r') break; ++index; } return index; } void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { va_list args; int line_start; int line_end; int prev_column; int i; va_start(args, fmt); line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index); line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index); prev_column = self->prev_index - line_start; fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column); vfprintf(stderr, fmt, args); fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start); for(i = 0; i < prev_column; ++i) fprintf(stderr, " "); fprintf(stderr, "\x1b[1;32m^\x1b[0m\n"); va_end(args); }