#ifndef AMALGAM_TOKENIZER_H #define AMALGAM_TOKENIZER_H #include "std/buffer_view.h" #include "std/misc.h" #include "std/defs.h" #include "defs.h" #include "number.h" #include "binop_type.h" #include "compiler_options.h" #include #define TOKENIZER_OK 0 /* General error */ #define TOKENIZER_ERR -1 #define TOKENIZER_UNEXPECTED_TOKEN -2 typedef enum { TOK_NONE, TOK_END_OF_FILE, TOK_IDENTIFIER, TOK_CONST, TOK_VAR, TOK_STRING, TOK_NUMBER, TOK_BOOL, TOK_FN, TOK_STRUCT, TOK_EQUALS, TOK_NOT, TOK_OPEN_PAREN, TOK_CLOSING_PAREN, TOK_COMMA, TOK_OPEN_BRACE, TOK_CLOSING_BRACE, TOK_IMPORT, TOK_SEMICOLON, TOK_COLON, TOK_BINOP, TOK_PUB, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_EXTERN, TOK_EXPORT, TOK_RETURN, TOK_QUESTION_MARK, TOK_AMPERSAND, TOK_C_VARARGS } Token; struct Tokenizer { BufferView code; int index; int prev_index; Token token; /* @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again, then do not rollback to previous token and instead reuse the already parsed token */ bool needs_update; BufferView code_name; union { BufferView identifier; BufferView string; BinopType binop_type; } value; AmalNumber number; bool bool_value; ArenaAllocator *allocator; /* borrowed */ const amal_compiler_options *compiler_options; /* borrowed */ }; typedef struct { int index; char* str; } TokenizerError; CHECK_RESULT int tokenizer_init(Tokenizer *self, ArenaAllocator *allocator, BufferView code, BufferView code_name, const amal_compiler_options *compiler_options); CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); /* @result is set to 0 if the next token is equal to @expected_token, otherwise @result is set to 1 */ CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result); void tokenizer_print_error_args(Tokenizer *self, int index, const char *fmt, va_list args); void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...); void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error); TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...); int tokenizer_get_error_index(Tokenizer *self); int tokenizer_get_code_reference_index(Tokenizer *self, const char *ref); bool tokenizer_contains_code_reference(Tokenizer *self, const char *code_ref); #endif