aboutsummaryrefslogtreecommitdiff
path: root/include/tokenizer.h
blob: b6b401b787b6f32b391dec3a64dc8c72832fe767 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#ifndef AMALGAM_TOKENIZER_H
#define AMALGAM_TOKENIZER_H

#include "std/buffer_view.h"
#include "std/misc.h"
#include "std/defs.h"
#include "defs.h"
#include "binop_type.h"
#include "compiler_options.h"
#include <stdarg.h>

#define TOKENIZER_OK 0
/* General error */
#define TOKENIZER_ERR -1
#define TOKENIZER_UNEXPECTED_TOKEN -2

typedef enum {
    TOK_NONE,
    TOK_END_OF_FILE,
    TOK_IDENTIFIER,
    TOK_CONST,
    TOK_VAR,
    TOK_STRING,
    TOK_FN,
    TOK_STRUCT,
    TOK_EQUALS,
    TOK_OPEN_PAREN,
    TOK_CLOSING_PAREN,
    TOK_COMMA,
    TOK_OPEN_BRACE,
    TOK_CLOSING_BRACE,
    TOK_IMPORT,
    TOK_NUMBER,
    TOK_SEMICOLON,
    TOK_COLON,
    TOK_BINOP,
    TOK_PUB,
    TOK_IF,
    TOK_ELSE,
    TOK_WHILE,
    TOK_EXTERN,
    TOK_EXPORT,
    TOK_RETURN
} Token;

struct Tokenizer {
    BufferView code;
    int index;
    int prev_index;
    Token token;
    /*
    @needs_update is an optimization when running tokenizer_consume_if. If expected_token is wrong and tokenizer_consume_if is called again,
    then do not rollback to previous token and instead reuse the already parsed token
    */
    bool needs_update;
    BufferView code_name;
    
    union {
        BufferView identifier;
        BufferView string;
        i64 integer;
        f64 floating;
        BinopType binop_type;
    } value;
    bool number_is_integer;
    ArenaAllocator *allocator; /* borrowed */
    const amal_compiler_options *compiler_options; /* borrowed */
};

typedef struct {
    int index;
    char* str;
} TokenizerError;

CHECK_RESULT int tokenizer_init(Tokenizer *self, ArenaAllocator *allocator, BufferView code, BufferView code_name, const amal_compiler_options *compiler_options);
CHECK_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
/* 
   @result is set to 0 if the next token is equal to @expected_token,
   otherwise @result is set to 1
*/
CHECK_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
void tokenizer_print_error_args(Tokenizer *self, int index, const char *fmt, va_list args);
void tokenizer_print_error(Tokenizer *self, int index, const char *fmt, ...);
void tokenizer_print_error_object(Tokenizer *self, TokenizerError *error);
TokenizerError tokenizer_create_error(Tokenizer *self, int index, const char *fmt, ...);
int tokenizer_get_error_index(Tokenizer *self);
int tokenizer_get_code_reference_index(Tokenizer *self, const char *ref);
bool tokenizer_contains_code_reference(Tokenizer *self, const char *code_ref);

#endif