include/tokenizer.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

#ifndef AMALGAM_TOKENIZER_H
#define AMALGAM_TOKENIZER_H

#include "std/buffer_view.h"
#include "std/misc.h"
#include "binop_type.h"

#define TOKENIZER_OK 0
/* General error */
#define TOKENIZER_ERR -1
#define TOKENIZER_UNEXPECTED_TOKEN -2

typedef enum {
    TOK_NONE,
    TOK_END_OF_FILE,
    TOK_IDENTIFIER,
    TOK_CONST,
    TOK_VAR,
    TOK_STRING,
    TOK_FN,
    TOK_EQUALS,
    TOK_OPEN_PAREN,
    TOK_CLOSING_PAREN,
    TOK_COMMA,
    TOK_OPEN_BRACE,
    TOK_CLOSING_BRACE,
    TOK_IMPORT,
    TOK_NUMBER,
    TOK_SEMICOLON,
    TOK_COLON,
    TOK_BINOP
} Token;

typedef struct {
    BufferView code;
    int index;
    int prev_index;
    int line;
    Token token;
    /*
    @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again,
    then do not rollback to previous token and instead reuse the already parsed token
    */
    bool needs_update;
    BufferView code_name;
    
    union {
        BufferView identifier;
        BufferView string;
        i64 integer;
        f64 floating;
        BinopType binop_type;
    } value;
    bool number_is_integer;
} Tokenizer;

typedef struct {
    int index;
    const char* str;
} TokenizerError;

CHECK_RESULT int tokenizer_init(Tokenizer *self, BufferView code, BufferView code_name);
CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
/* 
   @result is set to 0 if the next token is equal to @expected_token,
   otherwise @result is set to 1
*/
CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
void tokenizer_print_error(Tokenizer *self, const char *fmt, ...);
void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error);
TokenizerError tokenizer_create_error(Tokenizer *tokenizer, const char *err_str);

#endif