Initial commit, Function declaration work somewhat

author: dec05eba <dec05eba@protonmail.com> 2019-02-24 02:10:58 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-07-25 14:36:40 +0200
commit: 11dc4b81935e3dfee997c421d8d6fa166edd7a05 (patch)
tree: ccb08be54209a4900c740c9ed58e8f9c2910811d
23 files changed, 1068 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fcd2802
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.vscode/
+amalgam
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..9d3f9f8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+Copyright 2019 dec05eba
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ead55c9
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+Amalgam is written in c89 C standard to work on as many devices as possible and with many different compilers, which would allow you to compile amalgam with a compiler that generates smaller (static) binaries than gcc
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..8307dfe
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+set -e
+
+this_script_path=$(readlink -f "$0")
+this_script_dir=$(dirname "$this_script_path")
+source_files=$(readlink -f "$this_script_dir/src/"*)
+
+if [ -z "$CC" ]; then
+    CC=cc
+fi
+
+CFLAGS="-Wall -Wextra -Werror -g -O0 -DDEBUG -std=c89 -pedantic"
+LIBS="-pthread"
+
+set -x
+time "$CC" $source_files $CFLAGS $LIBS -o amalgam
diff --git a/doc/DESIGN.md b/doc/DESIGN.md
new file mode 100644
index 0000000..b8693f1
--- /dev/null
+++ b/doc/DESIGN.md
@@ -0,0 +1,251 @@
+# Amalgam design document
+Amalgam is a simple language with few keywords but at the same time is very powerful.
+All functions are closures. Assigning a closure to a variable is how you make regular functions.
+
+## Hello world
+```
+const main = () {
+    stderr.writeln("hello, world!");
+}
+```
+
+## Conditions
+```
+const main = () {
+    var value = 23 + 50;
+    if value < 23
+        stderr.writeln("less!");
+    else
+        stderr.writeln("more!");
+    
+    while value > 0 {
+        stderr.writeln("value: {}", value);
+        value -= 1;
+    }
+}
+```
+
+## Data types
+```
+const main = () !void {
+    var v1: i32 = 50;
+    var v2: u32 = 50;
+
+    v1 = v2; // error, v2 can't be implicitly cast to v1 because i32 can't represent the same values as u32
+    v1 = @cast(i32, v2); // ok, explicitly cast u32 to i32
+
+    var str1 = "hello";
+    var str2 = "world";
+    var str3 = try str1 + " " + str2;
+    var str4 = try str1 + 20; // error, can't add number to string. Preferable use str.fmt or explicitly cast to string
+    var str5 = try str1 + str(20); // ok, number explicitly cast to string
+
+    const str6 = "hello";
+    const str7 = "world";
+    const str8 = str6 + " " + str7; // ok, all variable involved are const. They can be combined at compile-time
+
+    stderr.writeln("{}, {} | {}", str1, str2, str3); // prints hello world | hello world
+}
+```
+
+## Dynamic allocation (array)
+```
+const ArrayList = @import("std.array.ArrayList");
+
+const main = () !void {
+    var list = ArrayList(i32);
+    try list.add(23);
+    try list.add(50);
+    var value = list.get(40);
+
+    for val in list {
+        stdout.writeln("value: {}", val);
+    }
+}
+```
+
+## Structures and instances
+```
+struct User {
+    name: str,
+    age: i32,
+    level = 1 // default value is 1 and type is i32
+}
+
+const levelUp = (self: &User) {
+    self.level += 1;
+}
+
+const main = () {
+    const user1 = User {
+        name: "John",
+        age: 24
+    }
+
+    var user2 = User {
+        name: "Titor",
+        age: 50,
+        level: 100
+    }
+
+    levelUp(user2);
+    // syntax sugar for calling a function with the first argument as
+    // the variable before the dot (same thing as levelUp(user2))
+    user2.levelUp();
+}
+```
+
+## Named parameters
+Functions call be called with arguments in position that matches the parameters or by using the names of the parameters.
+```
+struct User {
+    name: str,
+    age: i32,
+    level: i32
+}
+
+const createUser = (name: str, age: i32, level: i32 = 1) User {
+    return User {
+        name: name,
+        age: age,
+        level: level
+    }
+}
+
+const main = {
+    createUser(name: "John", level: 30, age: 30);
+    createUser(age: 40, name: "Titor");
+}
+```
+
+## Closure
+```
+const apply = (func: () bool) {
+    const result = func();
+}
+
+const main = () {
+    // Return type is automatically deduces. If function returns multiple different types at different points, then you get an error and are required to specify the return type
+    apply((){
+        return true;
+    });
+
+    apply(() bool {
+        return true;
+    });
+
+    // Or store in a variable and use it
+    const func = () {
+        return true;
+    }
+    apply(func);
+}
+```
+
+## Generic programming
+```
+const add = (comptime T: type, a: T, b: T) !T {
+    return try a + b;
+}
+
+const main = () {
+    var numberValue = add(20, 40);
+    var stringValue = add("hello", "world");
+}
+```
+
+## Ownership
+Like Rust, Amalgam has a concept of ownership but with less cumbersome syntax.
+There is one issue with ownership and that is references to data that gets reallocated.
+Rust doesn't handle this but Amalgam does it using #reallocatable(instance).
+Reallocatable should be ignored if the reference that taken from the reallocatable memory
+doesn't change location after realloc, which would be the case for pointers.
+```
+const ArrayList = @import("std.array.ArrayList");
+
+struct User {
+    name: str,
+    level: i32
+}
+
+const addUserToList = (list: &ArrayList(User), user: User) {
+    // this is not actually needed for ArrayList because ArrayList uses #reallocatable internally for list.add and list.remove
+    @reallocatable list.add(move user);
+}
+
+const main = () {
+    var users = ArrayList(User);
+    users.add(User {
+        name: "John",
+        level: 34
+    });
+
+    const user1 = User {
+        name: "David",
+        level: 55
+    }
+    // error, addUserToList expects user1 to be moved or copied
+    // addUserToList(users, user1);
+
+    // addUserToList(users, clone user1); // ok, user1 has been copied to function scope
+    addUserToList(users, move user1); // ok, user1 has been moved to function scope
+
+    next(move users);
+}
+
+const getUserAtIndex = (list: &ArrayList(User), index: usize) User {
+    return list.get(index);
+}
+
+const next = (users: ArrayList(User)) {
+    const user = getUserAtIndex(users, 0);
+
+    // Reallocatable example:
+    addUserToList(users, User {
+        name: "John",
+        level: 34
+    });
+
+    // error, "user" can't be safely used because addUserToList on line XXX can reallocate "users" which "user" belongs to
+    stdout.writeln("user name: {}", user.name);
+}
+```
+
+## Table (inspired by lua)
+```
+const main = () {
+    const values = {
+        "name": "John",
+        "age": 42,
+        "dogs": [
+            "spot",
+            "doggy"
+        ]
+    }
+
+    printMap(values); // stdout.writeln("{}", values) can also be used directly as it supports tables
+}
+
+const printTable = (value: TableValue) {
+    switch @type(value) {
+        array => {
+            // value type is automatically cast to array here, same with other cases in the switch
+            for index, val in value {
+                stdout.write("[{}] = ", index);
+                printTable(val);
+                stdout.writeln(",");
+            }
+        }
+        map => {
+            stdout.writeln("{");
+            for key, val in value {
+                stdout.write("'{}': ", key);
+                printTable(val);
+                stdout.writeln(",");
+            }
+            stdout.writeln("}");
+        }
+        else => stdout.write(value);
+    }
+}
+```
+\ No newline at end of file
diff --git a/doc/IMPLEMENTED.md b/doc/IMPLEMENTED.md
new file mode 100644
index 0000000..2583774
--- /dev/null
+++ b/doc/IMPLEMENTED.md
@@ -0,0 +1,2 @@
+const main = () {
+}
+\ No newline at end of file
diff --git a/include/alloc.h b/include/alloc.h
new file mode 100644
index 0000000..35223b3
--- /dev/null
+++ b/include/alloc.h
@@ -0,0 +1,14 @@
+#ifndef AMALGAM_ALLOC_H
+#define AMALGAM_ALLOC_H
+
+#include "types.h"
+#include "misc.h"
+
+#define ALLOC_OK 0
+#define ALLOC_FAIL -1
+
+WARN_UNUSED_RESULT int am_malloc(usize size, void **mem);
+WARN_UNUSED_RESULT int am_realloc(void *mem, usize new_size, void **new_mem);
+void am_free(void *mem);
+
+#endif
diff --git a/include/ast.h b/include/ast.h
new file mode 100644
index 0000000..edbe70f
--- /dev/null
+++ b/include/ast.h
@@ -0,0 +1,57 @@
+#ifndef AMALGAM_AST_H
+#define AMALGAM_AST_H
+
+#include "buffer_view.h"
+#include "buffer.h"
+#include "misc.h"
+
+typedef struct FunctionDecl FunctionDecl;
+typedef struct FunctionCall FunctionCall;
+typedef struct LhsExpr LhsExpr;
+
+typedef union {
+    FunctionDecl *func_decl;
+    FunctionCall *func_call;
+    LhsExpr *lhs_expr;
+} AstValue;
+
+typedef enum {
+    AST_NONE,
+    AST_FUNCTION_DECL,
+    AST_FUNCTION_CALL,
+    AST_LHS
+} AstType;
+
+typedef struct {
+    AstValue value;
+    AstType type;
+} Ast;
+
+struct FunctionDecl {
+    BufferView name;
+    Buffer body;
+};
+
+struct FunctionCall {
+    BufferView name;
+};
+
+struct LhsExpr {
+    int isConst;
+    BufferView var_name;
+    Ast rhs_expr;
+};
+
+Ast ast_none();
+void ast_deinit(Ast *ast);
+
+void funcdecl_init(FunctionDecl *self);
+void funcdecl_deinit(FunctionDecl *self);
+WARN_UNUSED_RESULT int funcdecl_add_to_body(FunctionDecl *self, Ast ast);
+
+void funccall_init(FunctionCall *self, BufferView name);
+
+void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name);
+void lhsexpr_deinit(LhsExpr *self);
+
+#endif
diff --git a/include/buffer.h b/include/buffer.h
new file mode 100644
index 0000000..57efe9e
--- /dev/null
+++ b/include/buffer.h
@@ -0,0 +1,22 @@
+#ifndef AMALGAM_BUFFER_H
+#define AMALGAM_BUFFER_H
+
+#include "types.h"
+#include "misc.h"
+
+#define BUFFER_OK 0
+#define BUFFER_ALLOC_FAIL -1
+
+typedef struct {
+    char* data;
+    usize size;
+    usize capacity;
+} Buffer;
+
+void buffer_init(Buffer *self);
+void buffer_deinit(Buffer *self);
+
+WARN_UNUSED_RESULT int buffer_append(Buffer *self, void *data, usize size);
+void* buffer_get(Buffer *self, usize index, usize type_size);
+
+#endif
+\ No newline at end of file
diff --git a/include/buffer_view.h b/include/buffer_view.h
new file mode 100644
index 0000000..4993dc2
--- /dev/null
+++ b/include/buffer_view.h
@@ -0,0 +1,14 @@
+#ifndef AMALGAM_BUFFER_VIEW_H
+#define AMALGAM_BUFFER_VIEW_H
+
+#include "types.h"
+
+typedef struct {
+    const char* data;
+    usize size;
+} BufferView;
+
+BufferView create_buffer_view_null();
+BufferView create_buffer_view(const char *data, usize size);
+
+#endif
diff --git a/include/mem.h b/include/mem.h
new file mode 100644
index 0000000..bad6353
--- /dev/null
+++ b/include/mem.h
@@ -0,0 +1,10 @@
+#ifndef AMALGAM_MEM_H
+#define AMALGAM_MEM_H
+
+#include "types.h"
+#include "misc.h"
+
+void am_memcpy(void *dest, const void *src, usize size);
+bool am_memeql(const void *lhs, const void *rhs, usize size);
+
+#endif
+\ No newline at end of file
diff --git a/include/misc.h b/include/misc.h
new file mode 100644
index 0000000..c83eeb2
--- /dev/null
+++ b/include/misc.h
@@ -0,0 +1,19 @@
+#ifndef AMALGAM_MISC_H
+#define AMALGAM_MISC_H
+
+#define return_if_error(result) \
+do { \
+    int return_if_result; \
+    return_if_result = (result); \
+    if((return_if_result) != 0) \
+        return return_if_result; \
+} while(0)
+#define cleanup_if_error(result) do { if((result) != 0) goto cleanup; } while(0)
+#define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+
+typedef enum {
+    bool_false,
+    bool_true
+} bool;
+
+#endif
+\ No newline at end of file
diff --git a/include/parser.h b/include/parser.h
new file mode 100644
index 0000000..8207381
--- /dev/null
+++ b/include/parser.h
@@ -0,0 +1,23 @@
+#ifndef AMALGAM_PARSER_H
+#define AMALGAM_PARSER_H
+
+#include "buffer.h"
+#include "buffer_view.h"
+#include "tokenizer.h"
+
+#define PARSER_OK 0
+/* General error */
+#define PARSER_ERR -1
+#define PARSER_UNEXPECTED_TOKEN -2
+
+typedef struct {
+    Buffer ast_objects;
+    Tokenizer tokenizer;
+} Parser;
+
+WARN_UNUSED_RESULT int parser_init(Parser *self);
+void parser_deinit(Parser *self);
+
+WARN_UNUSED_RESULT int parser_parse_buffer(Parser *self, BufferView code_buffer);
+
+#endif
diff --git a/include/tokenizer.h b/include/tokenizer.h
new file mode 100644
index 0000000..7dd377f
--- /dev/null
+++ b/include/tokenizer.h
@@ -0,0 +1,46 @@
+#ifndef AMALGAM_TOKENIZER_H
+#define AMALGAM_TOKENIZER_H
+
+#include "buffer_view.h"
+#include "misc.h"
+
+#define TOKENIZER_OK 0
+#define TOKENIZER_UNEXPECTED_TOKEN -1
+
+typedef enum {
+    TOK_NONE,
+    TOK_END_OF_FILE,
+    TOK_IDENTIFIER,
+    TOK_CONST,
+    TOK_VAR,
+    TOK_EQUALS,
+    TOK_OPEN_PAREN,
+    TOK_CLOSING_PAREN,
+    TOK_OPEN_BRACE,
+    TOK_CLOSING_BRACE
+} Token;
+
+typedef struct {
+    BufferView code;
+    int index;
+    int prev_index;
+    int line;
+    
+    union {
+        BufferView identifier;
+    } value;
+} Tokenizer;
+
+WARN_UNUSED_RESULT int tokenizer_init(Tokenizer *self, BufferView code);
+void tokenizer_deinit(Tokenizer *self);
+
+WARN_UNUSED_RESULT int tokenizer_next(Tokenizer *self, Token *token);
+WARN_UNUSED_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token);
+/* 
+   @result is set to 0 if the next token is equal to @expected_token,
+   otherwise @result is set to 1
+*/
+WARN_UNUSED_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result);
+void tokenizer_print_error(Tokenizer *self, const char *fmt, ...);
+
+#endif
diff --git a/include/types.h b/include/types.h
new file mode 100644
index 0000000..68e2d0f
--- /dev/null
+++ b/include/types.h
@@ -0,0 +1,20 @@
+#ifndef AMALGAM_TYPES_H
+#define AMALGAM_TYPES_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef ptrdiff_t isize;
+typedef size_t usize;
+
+#endif
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..c9ca7c3
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,25 @@
+#include "../include/alloc.h"
+#include <stdlib.h>
+
+int am_malloc(usize size, void **mem) {
+    void *allocated_data = malloc(size);
+    if(!allocated_data)
+        return ALLOC_FAIL;
+    
+    *mem = allocated_data;
+    return ALLOC_OK;
+}
+
+int am_realloc(void *mem, usize new_size, void **new_mem) {
+    void *new_allocated_data = realloc(mem, new_size);
+    if(!new_allocated_data)
+        return ALLOC_FAIL;
+    
+    *new_mem = new_allocated_data;
+    return ALLOC_OK;
+}
+
+void am_free(void *mem) {
+    free(mem);
+}
+
diff --git a/src/ast.c b/src/ast.c
new file mode 100644
index 0000000..719d48e
--- /dev/null
+++ b/src/ast.c
@@ -0,0 +1,41 @@
+#include "../include/ast.h"
+
+Ast ast_none() {
+    Ast ast;
+    ast.value.func_decl = NULL;
+    ast.type = AST_NONE;
+    return ast;
+}
+
+void ast_deinit(Ast *ast) {
+    /* TODO: Cleanup the different types of ast */
+    (void)ast;
+}
+
+void funcdecl_init(FunctionDecl *self) {
+    self->name = create_buffer_view_null();
+    buffer_init(&self->body);
+}
+
+void funcdecl_deinit(FunctionDecl *self) {
+    buffer_deinit(&self->body);
+}
+
+int funcdecl_add_to_body(FunctionDecl *self, Ast ast) {
+    return_if_error(buffer_append(&self->body, &ast, sizeof(ast)));
+    return BUFFER_OK;
+}
+
+void funccall_init(FunctionCall *self, BufferView name) {
+    self->name = name;
+}
+
+void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) {
+    self->isConst = isConst;
+    self->var_name = var_name;
+    self->rhs_expr = ast_none();
+}
+
+void lhsexpr_deinit(LhsExpr *self) {
+    ast_deinit(&self->rhs_expr);
+}
+\ No newline at end of file
diff --git a/src/buffer.c b/src/buffer.c
new file mode 100644
index 0000000..4bd3b68
--- /dev/null
+++ b/src/buffer.c
@@ -0,0 +1,56 @@
+#include "../include/buffer.h"
+#include "../include/alloc.h"
+#include "../include/mem.h"
+#include <assert.h>
+
+void buffer_init(Buffer *self) {
+    self->data = NULL;
+    self->size = 0;
+    self->capacity = 0;
+}
+
+void buffer_deinit(Buffer *self) {
+    am_free(self->data);
+    self->data = NULL;
+    self->size = 0;
+    self->capacity = 0;
+}
+
+static WARN_UNUSED_RESULT int buffer_ensure_capacity(Buffer *self, usize new_capacity) {
+    usize capacity;
+    void *new_mem;
+    int alloc_result;
+
+    if(self->capacity >= new_capacity)
+        return BUFFER_OK;
+    
+    capacity = self->capacity;
+    if(capacity == 0) {
+        capacity = new_capacity;
+    } else {
+        while(capacity < new_capacity) {
+            capacity *= 1.5;
+        }
+    }
+
+    alloc_result = am_realloc(self->data, capacity, &new_mem);
+    if(alloc_result != ALLOC_OK)
+        return BUFFER_ALLOC_FAIL;
+    
+    self->data = new_mem;
+    self->capacity = capacity;
+    return BUFFER_OK;
+}
+
+int buffer_append(Buffer *self, void *data, usize size) {
+    return_if_error(buffer_ensure_capacity(self, self->size + size));
+    am_memcpy(self->data + self->size, data, size);
+    return BUFFER_OK; 
+}
+
+void* buffer_get(Buffer *self, usize index, usize type_size) {
+    usize real_index;
+    real_index = index * type_size;
+    assert(real_index < self->size);
+    return &self->data[real_index];
+}
+\ No newline at end of file
diff --git a/src/buffer_view.c b/src/buffer_view.c
new file mode 100644
index 0000000..96b0dd7
--- /dev/null
+++ b/src/buffer_view.c
@@ -0,0 +1,15 @@
+#include "../include/buffer_view.h"
+
+BufferView create_buffer_view_null() {
+    BufferView buffer_view;
+    buffer_view.data = NULL;
+    buffer_view.size = 0;
+    return buffer_view;
+}
+
+BufferView create_buffer_view(const char *data, usize size) {
+    BufferView buffer_view;
+    buffer_view.data = data;
+    buffer_view.size = size;
+    return buffer_view;
+}
+\ No newline at end of file
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..f3147bb
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <string.h>
+#include "../include/parser.h"
+
+int main() {
+    const char *code;
+    Parser parser;
+    BufferView code_view;
+    int result;
+
+    code = 
+    "const main = () {\n"
+    "   var hello = () {\n"
+    "       \n"
+    "   }\n"
+    "   hello()\n"
+    "}\n"
+    "const print = () {\n"
+    "   \n"
+    "}";
+    result = parser_init(&parser);
+    if(result != PARSER_OK) {
+        fprintf(stderr, "Failed to initialize parser\n");
+        return 1;
+    }
+
+    code_view = create_buffer_view(code, strlen(code));
+    result = parser_parse_buffer(&parser, code_view);
+    if(result != PARSER_OK) {
+        fprintf(stderr, "Failed to parse\n");
+        return 1;
+    }
+
+    /* No need to do this here as the program is exiting */
+    /* parser_deinit(&parser); */
+    return 0;
+}
diff --git a/src/mem.c b/src/mem.c
new file mode 100644
index 0000000..acd2ebd
--- /dev/null
+++ b/src/mem.c
@@ -0,0 +1,10 @@
+#include "../include/mem.h"
+#include <string.h>
+
+void am_memcpy(void *dest, const void *src, usize size) {
+    memcpy(dest, src, size);
+}
+
+bool am_memeql(const void *lhs, const void *rhs, usize size) {
+    return memcmp(lhs, rhs, size) == 0;
+}
+\ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..ddf4a18
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,187 @@
+#include "../include/parser.h"
+#include "../include/ast.h"
+#include "../include/misc.h"
+#include "../include/alloc.h"
+#include <stdio.h>
+
+static WARN_UNUSED_RESULT int parser_parse_body(Parser *self, Ast *ast);
+
+int parser_init(Parser *self) {
+    buffer_init(&self->ast_objects);
+    return PARSER_OK;
+}
+
+void parser_deinit(Parser *self) {
+    usize i;
+    for(i = 0; i < self->ast_objects.size / sizeof(Ast); ++i) {
+        ast_deinit((Ast*)&self->ast_objects.data[i]);
+    }
+    buffer_deinit(&self->ast_objects);
+}
+
+static WARN_UNUSED_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) {
+    bool isConst;
+    BufferView var_name;
+    *result = NULL;
+
+    return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &isConst));
+    if(!isConst) {
+        bool isVar;
+        return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar));
+        if(!isVar)
+            return PARSER_OK;
+    }
+
+    return_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER));
+    var_name = self->tokenizer.value.identifier;
+    fprintf(stderr, "var name: %.*s\n", (int)var_name.size, var_name.data);
+    return_if_error(am_malloc(sizeof(LhsExpr), (void**)result));
+    lhsexpr_init(*result, isConst, var_name);
+    return PARSER_OK;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) {
+    bool result;
+    *func_decl = NULL;
+
+    return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result));
+    if(!result)
+        return PARSER_OK;
+
+    /* TODO: Parse parameters */
+    return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN));
+    /* TODO: Parse return types */
+    return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE));
+
+    return_if_error(am_malloc(sizeof(FunctionDecl), (void**)func_decl));
+    funcdecl_init(*func_decl);
+
+    for(;;) {
+        Ast body_obj;
+        cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result));
+        if(result)
+            break;
+        
+        cleanup_if_error(parser_parse_body(self, &body_obj));
+        cleanup_if_error(funcdecl_add_to_body(*func_decl, body_obj));
+    }
+    return PARSER_OK;
+
+    cleanup:
+    if(*func_decl) {
+        funcdecl_deinit(*func_decl);
+        am_free(*func_decl);
+        *func_decl = NULL;
+    }
+    return PARSER_ERR;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) {
+    bool result;
+    BufferView func_name;
+    *func_call = NULL;
+
+    return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result));
+    if(!result)
+        return PARSER_OK;
+    
+    func_name = self->tokenizer.value.identifier;
+    return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN));
+    /* TODO: Parse arguments */
+    return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN));
+
+    return_if_error(am_malloc(sizeof(FunctionCall), (void**)func_call));
+    funccall_init(*func_call, func_name);
+    return PARSER_OK;
+}
+
+static WARN_UNUSED_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) {
+    FunctionDecl *func_decl;
+    FunctionCall *func_call;
+    Token token;
+    func_decl = NULL;
+    func_call = NULL;
+
+    cleanup_if_error(parser_parse_function_decl(self, &func_decl));
+    if(func_decl) {
+        rhs_expr->type = AST_FUNCTION_DECL;
+        rhs_expr->value.func_decl = func_decl;
+        return PARSER_OK;
+    }
+
+    cleanup_if_error(parser_parse_function_call(self, &func_call));
+    if(func_call) {
+        rhs_expr->type = AST_FUNCTION_CALL;
+        rhs_expr->value.func_call = func_call;
+        return PARSER_OK;
+    }
+
+    return_if_error(tokenizer_next(&self->tokenizer, &token));
+    /* TODO: Convert token to string */
+    tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call, got token: %d");
+    return PARSER_UNEXPECTED_TOKEN;
+
+    cleanup:
+    if(func_decl) {
+        funcdecl_deinit(func_decl);
+        am_free(func_decl);
+    }
+    if(func_call) {
+        /*funccall_deinit(func_call);*/
+        am_free(func_call);
+    }
+    return PARSER_ERR;
+}
+
+int parser_parse_body(Parser *self, Ast *ast) {
+    LhsExpr *lhs_expr;
+    Ast rhs_expr;
+
+    return_if_error(parser_parse_lhs(self, &lhs_expr));
+    if(lhs_expr)
+        return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS));
+    
+    cleanup_if_error(parser_parse_rhs(self, &rhs_expr));
+    if(lhs_expr) {
+        lhs_expr->rhs_expr = rhs_expr;
+        ast->type = AST_LHS;
+        ast->value.lhs_expr = lhs_expr;
+    } else {
+        *ast = rhs_expr;
+    }
+    return PARSER_OK;
+
+    cleanup:
+    if(lhs_expr) {
+        lhsexpr_deinit(lhs_expr);
+        am_free(lhs_expr);
+    }
+    return PARSER_ERR;
+}
+
+int parser_parse_buffer(Parser *self, BufferView code_buffer) {
+    Ast ast;
+    ast = ast_none();
+    return_if_error(tokenizer_init(&self->tokenizer, code_buffer));
+
+    for(;;) {
+        bool isEof;
+        cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof));
+        if(isEof)
+            goto cleanup_noerr;
+        
+        cleanup_if_error(parser_parse_body(self, &ast));
+        cleanup_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast)));
+        /* For cleanup, we only want to cleanup the last created ast after parser_parse_body */
+        ast = ast_none();
+    }
+
+    cleanup_noerr:
+    tokenizer_deinit(&self->tokenizer);
+    return PARSER_OK;
+
+    cleanup:
+    tokenizer_deinit(&self->tokenizer);
+    ast_deinit(&ast);
+    return PARSER_ERR;
+}
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 0000000..f1763a5
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,186 @@
+#include "../include/tokenizer.h"
+#include "../include/mem.h"
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+static int isAlpha(int c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static int isDigit(int c) {
+    return c >= '0' && c <= '9';
+}
+
+static int isAlphaDigit(int c) {
+    return isAlpha(c) || isDigit(c);
+}
+
+int tokenizer_init(Tokenizer *self, BufferView code) {
+    assert(code.size <= INT_MAX);
+    self->code = code;
+    self->index = 0;
+    self->prev_index = 0;
+    self->line = 1;
+    return 0;
+}
+
+void tokenizer_deinit(Tokenizer *self) {
+    (void)self;    
+}
+
+static int tokenizer_get_char(Tokenizer *self) {
+    assert(self->index >= 0 && self->index < (int)self->code.size);
+    return self->code.data[self->index];
+}
+
+static Token tokenizer_skip_whitespace(Tokenizer *self) {
+    int c;
+    for(;;) {
+        if(self->index >= (int)self->code.size)
+            return TOK_END_OF_FILE;
+
+        c = self->code.data[self->index];
+        switch(c) {
+            case '\n':
+                ++self->line;
+                /* fallthrough */
+            case ' ':
+            case '\t':
+                break;
+            default:
+                return TOK_NONE;
+        }
+        ++self->index;
+    }
+}
+
+int tokenizer_next(Tokenizer *self, Token *token) {
+    Token last_token;
+    int c;
+
+    last_token = tokenizer_skip_whitespace(self);
+    if(last_token == TOK_END_OF_FILE) {
+        *token = TOK_END_OF_FILE;
+        return TOKENIZER_OK;
+    }
+
+    self->prev_index = self->index;
+    c = tokenizer_get_char(self);
+    if(isAlpha(c) || c == '_') {
+        int identifier_start;
+        identifier_start = self->index;
+        ++self->index;
+
+        while(self->index < (int)self->code.size) {
+            c = tokenizer_get_char(self);
+            if(isAlphaDigit(c) || c == '_')
+                ++self->index;
+            else
+                break;
+        }
+
+        self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start);
+
+        if(am_memeql(self->value.identifier.data, "const", 5))
+            *token = TOK_CONST;
+        else if(am_memeql(self->value.identifier.data, "var", 3))
+            *token = TOK_VAR;
+        else
+            *token = TOK_IDENTIFIER;
+    } else if(c == '=') {
+        ++self->index;
+        *token = TOK_EQUALS;
+    } else if(c == '(') {
+        ++self->index;
+        *token = TOK_OPEN_PAREN;
+    } else if(c == ')') {
+        ++self->index;
+        *token = TOK_CLOSING_PAREN;
+    } else if(c == '{') {
+        ++self->index;
+        *token = TOK_OPEN_BRACE;
+    } else if(c == '}') {
+        ++self->index;
+        *token = TOK_CLOSING_BRACE;
+    } else {
+        /*self.printError("Unexpected symbol '{c}'", c);*/
+        tokenizer_print_error(self, "Unexpected symbol '%c'", c);
+        return TOKENIZER_UNEXPECTED_TOKEN;
+    }
+    return TOKENIZER_OK;
+}
+
+int tokenizer_accept(Tokenizer *self, Token expected_token) {
+    Token actual_token;
+    return_if_error(tokenizer_next(self, &actual_token));
+    if(actual_token == expected_token)
+        return TOKENIZER_OK;
+
+    /* Todo: convert token to string */
+    tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token);
+    return TOKENIZER_UNEXPECTED_TOKEN;
+}
+
+int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) {
+    int index;
+    int line;
+    Token actual_token;
+
+    index = self->index;
+    line = self->line;
+    return_if_error(tokenizer_next(self, &actual_token));
+    if(actual_token == expected_token) {
+        *result = bool_true;
+    } else {
+        /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */
+        self->index = index;
+        self->line = line;
+        *result = bool_false;
+    }
+    return TOKENIZER_OK;
+}
+
+static int tokenizer_get_start_of_line_from_index(Tokenizer *self, int index) {
+    int c;
+    while(index >= 0) {
+        c = self->code.data[(usize)index];
+        if(c == '\n' || c == '\r') {
+            return index + 1;
+        }
+        --index;
+    }
+    return 0;
+}
+
+static int tokenizer_get_end_of_line_from_index(Tokenizer *self, int index) {
+    int c;
+    while(index < (int)self->code.size) {
+        c = self->code.data[(usize)index];
+        if(c == '\n' || c == '\r')
+            break;
+        ++index;
+    }
+    return index;
+}
+
+void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) {
+    va_list args;
+    int line_start;
+    int line_end;
+    int prev_column;
+    int i;
+
+    va_start(args, fmt);
+    line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index);
+    line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index);
+    prev_column = self->prev_index - line_start;
+    fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column);
+    vfprintf(stderr, fmt, args);
+    fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start);
+    for(i = 0; i < prev_column; ++i)
+        fprintf(stderr, " ");
+    fprintf(stderr, "\x1b[1;32m^\x1b[0m\n");
+    va_end(args);
+}
+\ No newline at end of file
author	dec05eba <dec05eba@protonmail.com>	2019-02-24 02:10:58 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-07-25 14:36:40 +0200
commit	11dc4b81935e3dfee997c421d8d6fa166edd7a05 (patch)
tree	ccb08be54209a4900c740c9ed58e8f9c2910811d