1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
#ifndef AMALGAM_TOKENIZER_H
#define AMALGAM_TOKENIZER_H
#include "std/buffer_view.h"
#include "std/misc.h"
#include "std/defs.h"
#include "binop_type.h"
#define TOKENIZER_OK 0
/* General error */
#define TOKENIZER_ERR -1
#define TOKENIZER_UNEXPECTED_TOKEN -2
typedef enum {
TOK_NONE,
TOK_END_OF_FILE,
TOK_IDENTIFIER,
TOK_CONST,
TOK_VAR,
TOK_STRING,
TOK_FN,
TOK_EQUALS,
TOK_OPEN_PAREN,
TOK_CLOSING_PAREN,
TOK_COMMA,
TOK_OPEN_BRACE,
TOK_CLOSING_BRACE,
TOK_IMPORT,
TOK_NUMBER,
TOK_SEMICOLON,
TOK_COLON,
TOK_BINOP
} Token;
typedef struct {
BufferView code;
int index;
int prev_index;
Token token;
/*
@needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again,
then do not rollback to previous token and instead reuse the already parsed token
*/
bool needs_update;
BufferView code_name;
union {
BufferView identifier;
BufferView string;
i64 integer;
f64 floating;
BinopType binop_type;
} value;
bool number_is_integer;
ScopedAllocator *allocator; /* borrowed */
} Tokenizer;
typedef struct {
int index;
char* str;
} TokenizerError;
CHECK_RESULT int tokenizer_init(Tokenizer *self, ScopedAllocator *allocator, BufferView code, BufferView code_name);
CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
/*
@result is set to 0 if the next token is equal to @expected_token,
otherwise @result is set to 1
*/
CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...);
void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error);
TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...);
int tokenizer_get_error_index(Tokenizer *self);
int tokenizer_get_code_reference_index(Tokenizer *self, const char *ref);
#endif
|