From 11dc4b81935e3dfee997c421d8d6fa166edd7a05 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 24 Feb 2019 02:10:58 +0100 Subject: Initial commit, Function declaration work somewhat --- .gitignore | 2 + LICENSE | 13 +++ README.md | 1 + build.sh | 17 ++++ doc/DESIGN.md | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++ doc/IMPLEMENTED.md | 2 + include/alloc.h | 14 +++ include/ast.h | 57 ++++++++++++ include/buffer.h | 22 +++++ include/buffer_view.h | 14 +++ include/mem.h | 10 ++ include/misc.h | 19 ++++ include/parser.h | 23 +++++ include/tokenizer.h | 46 +++++++++ include/types.h | 20 ++++ src/alloc.c | 25 +++++ src/ast.c | 41 +++++++++ src/buffer.c | 56 +++++++++++ src/buffer_view.c | 15 +++ src/main.c | 37 ++++++++ src/mem.c | 10 ++ src/parser.c | 187 +++++++++++++++++++++++++++++++++++++ src/tokenizer.c | 186 +++++++++++++++++++++++++++++++++++++ 23 files changed, 1068 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100755 build.sh create mode 100644 doc/DESIGN.md create mode 100644 doc/IMPLEMENTED.md create mode 100644 include/alloc.h create mode 100644 include/ast.h create mode 100644 include/buffer.h create mode 100644 include/buffer_view.h create mode 100644 include/mem.h create mode 100644 include/misc.h create mode 100644 include/parser.h create mode 100644 include/tokenizer.h create mode 100644 include/types.h create mode 100644 src/alloc.c create mode 100644 src/ast.c create mode 100644 src/buffer.c create mode 100644 src/buffer_view.c create mode 100644 src/main.c create mode 100644 src/mem.c create mode 100644 src/parser.c create mode 100644 src/tokenizer.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fcd2802 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode/ +amalgam diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9d3f9f8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2019 dec05eba + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ead55c9 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Amalgam is written in c89 C standard to work on as many devices as possible and with many different compilers, which would allow you to compile amalgam with a compiler that generates smaller (static) binaries than gcc diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..8307dfe --- /dev/null +++ b/build.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +this_script_path=$(readlink -f "$0") +this_script_dir=$(dirname "$this_script_path") +source_files=$(readlink -f "$this_script_dir/src/"*) + +if [ -z "$CC" ]; then + CC=cc +fi + +CFLAGS="-Wall -Wextra -Werror -g -O0 -DDEBUG -std=c89 -pedantic" +LIBS="-pthread" + +set -x +time "$CC" $source_files $CFLAGS $LIBS -o amalgam diff --git a/doc/DESIGN.md b/doc/DESIGN.md new file mode 100644 index 0000000..b8693f1 --- /dev/null +++ b/doc/DESIGN.md @@ -0,0 +1,251 @@ +# Amalgam design document +Amalgam is a simple language with few keywords but at the same time is very powerful. +All functions are closures. Assigning a closure to a variable is how you make regular functions. + +## Hello world +``` +const main = () { + stderr.writeln("hello, world!"); +} +``` + +## Conditions +``` +const main = () { + var value = 23 + 50; + if value < 23 + stderr.writeln("less!"); + else + stderr.writeln("more!"); + + while value > 0 { + stderr.writeln("value: {}", value); + value -= 1; + } +} +``` + +## Data types +``` +const main = () !void { + var v1: i32 = 50; + var v2: u32 = 50; + + v1 = v2; // error, v2 can't be implicitly cast to v1 because i32 can't represent the same values as u32 + v1 = @cast(i32, v2); // ok, explicitly cast u32 to i32 + + var str1 = "hello"; + var str2 = "world"; + var str3 = try str1 + " " + str2; + var str4 = try str1 + 20; // error, can't add number to string. Preferable use str.fmt or explicitly cast to string + var str5 = try str1 + str(20); // ok, number explicitly cast to string + + const str6 = "hello"; + const str7 = "world"; + const str8 = str6 + " " + str7; // ok, all variable involved are const. They can be combined at compile-time + + stderr.writeln("{}, {} | {}", str1, str2, str3); // prints hello world | hello world +} +``` + +## Dynamic allocation (array) +``` +const ArrayList = @import("std.array.ArrayList"); + +const main = () !void { + var list = ArrayList(i32); + try list.add(23); + try list.add(50); + var value = list.get(40); + + for val in list { + stdout.writeln("value: {}", val); + } +} +``` + +## Structures and instances +``` +struct User { + name: str, + age: i32, + level = 1 // default value is 1 and type is i32 +} + +const levelUp = (self: &User) { + self.level += 1; +} + +const main = () { + const user1 = User { + name: "John", + age: 24 + } + + var user2 = User { + name: "Titor", + age: 50, + level: 100 + } + + levelUp(user2); + // syntax sugar for calling a function with the first argument as + // the variable before the dot (same thing as levelUp(user2)) + user2.levelUp(); +} +``` + +## Named parameters +Functions call be called with arguments in position that matches the parameters or by using the names of the parameters. +``` +struct User { + name: str, + age: i32, + level: i32 +} + +const createUser = (name: str, age: i32, level: i32 = 1) User { + return User { + name: name, + age: age, + level: level + } +} + +const main = { + createUser(name: "John", level: 30, age: 30); + createUser(age: 40, name: "Titor"); +} +``` + +## Closure +``` +const apply = (func: () bool) { + const result = func(); +} + +const main = () { + // Return type is automatically deduces. If function returns multiple different types at different points, then you get an error and are required to specify the return type + apply((){ + return true; + }); + + apply(() bool { + return true; + }); + + // Or store in a variable and use it + const func = () { + return true; + } + apply(func); +} +``` + +## Generic programming +``` +const add = (comptime T: type, a: T, b: T) !T { + return try a + b; +} + +const main = () { + var numberValue = add(20, 40); + var stringValue = add("hello", "world"); +} +``` + +## Ownership +Like Rust, Amalgam has a concept of ownership but with less cumbersome syntax. +There is one issue with ownership and that is references to data that gets reallocated. +Rust doesn't handle this but Amalgam does it using #reallocatable(instance). +Reallocatable should be ignored if the reference that taken from the reallocatable memory +doesn't change location after realloc, which would be the case for pointers. +``` +const ArrayList = @import("std.array.ArrayList"); + +struct User { + name: str, + level: i32 +} + +const addUserToList = (list: &ArrayList(User), user: User) { + // this is not actually needed for ArrayList because ArrayList uses #reallocatable internally for list.add and list.remove + @reallocatable list.add(move user); +} + +const main = () { + var users = ArrayList(User); + users.add(User { + name: "John", + level: 34 + }); + + const user1 = User { + name: "David", + level: 55 + } + // error, addUserToList expects user1 to be moved or copied + // addUserToList(users, user1); + + // addUserToList(users, clone user1); // ok, user1 has been copied to function scope + addUserToList(users, move user1); // ok, user1 has been moved to function scope + + next(move users); +} + +const getUserAtIndex = (list: &ArrayList(User), index: usize) User { + return list.get(index); +} + +const next = (users: ArrayList(User)) { + const user = getUserAtIndex(users, 0); + + // Reallocatable example: + addUserToList(users, User { + name: "John", + level: 34 + }); + + // error, "user" can't be safely used because addUserToList on line XXX can reallocate "users" which "user" belongs to + stdout.writeln("user name: {}", user.name); +} +``` + +## Table (inspired by lua) +``` +const main = () { + const values = { + "name": "John", + "age": 42, + "dogs": [ + "spot", + "doggy" + ] + } + + printMap(values); // stdout.writeln("{}", values) can also be used directly as it supports tables +} + +const printTable = (value: TableValue) { + switch @type(value) { + array => { + // value type is automatically cast to array here, same with other cases in the switch + for index, val in value { + stdout.write("[{}] = ", index); + printTable(val); + stdout.writeln(","); + } + } + map => { + stdout.writeln("{"); + for key, val in value { + stdout.write("'{}': ", key); + printTable(val); + stdout.writeln(","); + } + stdout.writeln("}"); + } + else => stdout.write(value); + } +} +``` \ No newline at end of file diff --git a/doc/IMPLEMENTED.md b/doc/IMPLEMENTED.md new file mode 100644 index 0000000..2583774 --- /dev/null +++ b/doc/IMPLEMENTED.md @@ -0,0 +1,2 @@ +const main = () { +} \ No newline at end of file diff --git a/include/alloc.h b/include/alloc.h new file mode 100644 index 0000000..35223b3 --- /dev/null +++ b/include/alloc.h @@ -0,0 +1,14 @@ +#ifndef AMALGAM_ALLOC_H +#define AMALGAM_ALLOC_H + +#include "types.h" +#include "misc.h" + +#define ALLOC_OK 0 +#define ALLOC_FAIL -1 + +WARN_UNUSED_RESULT int am_malloc(usize size, void **mem); +WARN_UNUSED_RESULT int am_realloc(void *mem, usize new_size, void **new_mem); +void am_free(void *mem); + +#endif diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..edbe70f --- /dev/null +++ b/include/ast.h @@ -0,0 +1,57 @@ +#ifndef AMALGAM_AST_H +#define AMALGAM_AST_H + +#include "buffer_view.h" +#include "buffer.h" +#include "misc.h" + +typedef struct FunctionDecl FunctionDecl; +typedef struct FunctionCall FunctionCall; +typedef struct LhsExpr LhsExpr; + +typedef union { + FunctionDecl *func_decl; + FunctionCall *func_call; + LhsExpr *lhs_expr; +} AstValue; + +typedef enum { + AST_NONE, + AST_FUNCTION_DECL, + AST_FUNCTION_CALL, + AST_LHS +} AstType; + +typedef struct { + AstValue value; + AstType type; +} Ast; + +struct FunctionDecl { + BufferView name; + Buffer body; +}; + +struct FunctionCall { + BufferView name; +}; + +struct LhsExpr { + int isConst; + BufferView var_name; + Ast rhs_expr; +}; + +Ast ast_none(); +void ast_deinit(Ast *ast); + +void funcdecl_init(FunctionDecl *self); +void funcdecl_deinit(FunctionDecl *self); +WARN_UNUSED_RESULT int funcdecl_add_to_body(FunctionDecl *self, Ast ast); + +void funccall_init(FunctionCall *self, BufferView name); + +void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name); +void lhsexpr_deinit(LhsExpr *self); + +#endif diff --git a/include/buffer.h b/include/buffer.h new file mode 100644 index 0000000..57efe9e --- /dev/null +++ b/include/buffer.h @@ -0,0 +1,22 @@ +#ifndef AMALGAM_BUFFER_H +#define AMALGAM_BUFFER_H + +#include "types.h" +#include "misc.h" + +#define BUFFER_OK 0 +#define BUFFER_ALLOC_FAIL -1 + +typedef struct { + char* data; + usize size; + usize capacity; +} Buffer; + +void buffer_init(Buffer *self); +void buffer_deinit(Buffer *self); + +WARN_UNUSED_RESULT int buffer_append(Buffer *self, void *data, usize size); +void* buffer_get(Buffer *self, usize index, usize type_size); + +#endif \ No newline at end of file diff --git a/include/buffer_view.h b/include/buffer_view.h new file mode 100644 index 0000000..4993dc2 --- /dev/null +++ b/include/buffer_view.h @@ -0,0 +1,14 @@ +#ifndef AMALGAM_BUFFER_VIEW_H +#define AMALGAM_BUFFER_VIEW_H + +#include "types.h" + +typedef struct { + const char* data; + usize size; +} BufferView; + +BufferView create_buffer_view_null(); +BufferView create_buffer_view(const char *data, usize size); + +#endif diff --git a/include/mem.h b/include/mem.h new file mode 100644 index 0000000..bad6353 --- /dev/null +++ b/include/mem.h @@ -0,0 +1,10 @@ +#ifndef AMALGAM_MEM_H +#define AMALGAM_MEM_H + +#include "types.h" +#include "misc.h" + +void am_memcpy(void *dest, const void *src, usize size); +bool am_memeql(const void *lhs, const void *rhs, usize size); + +#endif \ No newline at end of file diff --git a/include/misc.h b/include/misc.h new file mode 100644 index 0000000..c83eeb2 --- /dev/null +++ b/include/misc.h @@ -0,0 +1,19 @@ +#ifndef AMALGAM_MISC_H +#define AMALGAM_MISC_H + +#define return_if_error(result) \ +do { \ + int return_if_result; \ + return_if_result = (result); \ + if((return_if_result) != 0) \ + return return_if_result; \ +} while(0) +#define cleanup_if_error(result) do { if((result) != 0) goto cleanup; } while(0) +#define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) + +typedef enum { + bool_false, + bool_true +} bool; + +#endif \ No newline at end of file diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..8207381 --- /dev/null +++ b/include/parser.h @@ -0,0 +1,23 @@ +#ifndef AMALGAM_PARSER_H +#define AMALGAM_PARSER_H + +#include "buffer.h" +#include "buffer_view.h" +#include "tokenizer.h" + +#define PARSER_OK 0 +/* General error */ +#define PARSER_ERR -1 +#define PARSER_UNEXPECTED_TOKEN -2 + +typedef struct { + Buffer ast_objects; + Tokenizer tokenizer; +} Parser; + +WARN_UNUSED_RESULT int parser_init(Parser *self); +void parser_deinit(Parser *self); + +WARN_UNUSED_RESULT int parser_parse_buffer(Parser *self, BufferView code_buffer); + +#endif diff --git a/include/tokenizer.h b/include/tokenizer.h new file mode 100644 index 0000000..7dd377f --- /dev/null +++ b/include/tokenizer.h @@ -0,0 +1,46 @@ +#ifndef AMALGAM_TOKENIZER_H +#define AMALGAM_TOKENIZER_H + +#include "buffer_view.h" +#include "misc.h" + +#define TOKENIZER_OK 0 +#define TOKENIZER_UNEXPECTED_TOKEN -1 + +typedef enum { + TOK_NONE, + TOK_END_OF_FILE, + TOK_IDENTIFIER, + TOK_CONST, + TOK_VAR, + TOK_EQUALS, + TOK_OPEN_PAREN, + TOK_CLOSING_PAREN, + TOK_OPEN_BRACE, + TOK_CLOSING_BRACE +} Token; + +typedef struct { + BufferView code; + int index; + int prev_index; + int line; + + union { + BufferView identifier; + } value; +} Tokenizer; + +WARN_UNUSED_RESULT int tokenizer_init(Tokenizer *self, BufferView code); +void tokenizer_deinit(Tokenizer *self); + +WARN_UNUSED_RESULT int tokenizer_next(Tokenizer *self, Token *token); +WARN_UNUSED_RESULT int tokenizer_accept(Tokenizer *self, Token expected_token); +/* + @result is set to 0 if the next token is equal to @expected_token, + otherwise @result is set to 1 +*/ +WARN_UNUSED_RESULT int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result); +void tokenizer_print_error(Tokenizer *self, const char *fmt, ...); + +#endif diff --git a/include/types.h b/include/types.h new file mode 100644 index 0000000..68e2d0f --- /dev/null +++ b/include/types.h @@ -0,0 +1,20 @@ +#ifndef AMALGAM_TYPES_H +#define AMALGAM_TYPES_H + +#include +#include + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef ptrdiff_t isize; +typedef size_t usize; + +#endif diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..c9ca7c3 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,25 @@ +#include "../include/alloc.h" +#include + +int am_malloc(usize size, void **mem) { + void *allocated_data = malloc(size); + if(!allocated_data) + return ALLOC_FAIL; + + *mem = allocated_data; + return ALLOC_OK; +} + +int am_realloc(void *mem, usize new_size, void **new_mem) { + void *new_allocated_data = realloc(mem, new_size); + if(!new_allocated_data) + return ALLOC_FAIL; + + *new_mem = new_allocated_data; + return ALLOC_OK; +} + +void am_free(void *mem) { + free(mem); +} + diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..719d48e --- /dev/null +++ b/src/ast.c @@ -0,0 +1,41 @@ +#include "../include/ast.h" + +Ast ast_none() { + Ast ast; + ast.value.func_decl = NULL; + ast.type = AST_NONE; + return ast; +} + +void ast_deinit(Ast *ast) { + /* TODO: Cleanup the different types of ast */ + (void)ast; +} + +void funcdecl_init(FunctionDecl *self) { + self->name = create_buffer_view_null(); + buffer_init(&self->body); +} + +void funcdecl_deinit(FunctionDecl *self) { + buffer_deinit(&self->body); +} + +int funcdecl_add_to_body(FunctionDecl *self, Ast ast) { + return_if_error(buffer_append(&self->body, &ast, sizeof(ast))); + return BUFFER_OK; +} + +void funccall_init(FunctionCall *self, BufferView name) { + self->name = name; +} + +void lhsexpr_init(LhsExpr *self, int isConst, BufferView var_name) { + self->isConst = isConst; + self->var_name = var_name; + self->rhs_expr = ast_none(); +} + +void lhsexpr_deinit(LhsExpr *self) { + ast_deinit(&self->rhs_expr); +} \ No newline at end of file diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..4bd3b68 --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,56 @@ +#include "../include/buffer.h" +#include "../include/alloc.h" +#include "../include/mem.h" +#include + +void buffer_init(Buffer *self) { + self->data = NULL; + self->size = 0; + self->capacity = 0; +} + +void buffer_deinit(Buffer *self) { + am_free(self->data); + self->data = NULL; + self->size = 0; + self->capacity = 0; +} + +static WARN_UNUSED_RESULT int buffer_ensure_capacity(Buffer *self, usize new_capacity) { + usize capacity; + void *new_mem; + int alloc_result; + + if(self->capacity >= new_capacity) + return BUFFER_OK; + + capacity = self->capacity; + if(capacity == 0) { + capacity = new_capacity; + } else { + while(capacity < new_capacity) { + capacity *= 1.5; + } + } + + alloc_result = am_realloc(self->data, capacity, &new_mem); + if(alloc_result != ALLOC_OK) + return BUFFER_ALLOC_FAIL; + + self->data = new_mem; + self->capacity = capacity; + return BUFFER_OK; +} + +int buffer_append(Buffer *self, void *data, usize size) { + return_if_error(buffer_ensure_capacity(self, self->size + size)); + am_memcpy(self->data + self->size, data, size); + return BUFFER_OK; +} + +void* buffer_get(Buffer *self, usize index, usize type_size) { + usize real_index; + real_index = index * type_size; + assert(real_index < self->size); + return &self->data[real_index]; +} \ No newline at end of file diff --git a/src/buffer_view.c b/src/buffer_view.c new file mode 100644 index 0000000..96b0dd7 --- /dev/null +++ b/src/buffer_view.c @@ -0,0 +1,15 @@ +#include "../include/buffer_view.h" + +BufferView create_buffer_view_null() { + BufferView buffer_view; + buffer_view.data = NULL; + buffer_view.size = 0; + return buffer_view; +} + +BufferView create_buffer_view(const char *data, usize size) { + BufferView buffer_view; + buffer_view.data = data; + buffer_view.size = size; + return buffer_view; +} \ No newline at end of file diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..f3147bb --- /dev/null +++ b/src/main.c @@ -0,0 +1,37 @@ +#include +#include +#include "../include/parser.h" + +int main() { + const char *code; + Parser parser; + BufferView code_view; + int result; + + code = + "const main = () {\n" + " var hello = () {\n" + " \n" + " }\n" + " hello()\n" + "}\n" + "const print = () {\n" + " \n" + "}"; + result = parser_init(&parser); + if(result != PARSER_OK) { + fprintf(stderr, "Failed to initialize parser\n"); + return 1; + } + + code_view = create_buffer_view(code, strlen(code)); + result = parser_parse_buffer(&parser, code_view); + if(result != PARSER_OK) { + fprintf(stderr, "Failed to parse\n"); + return 1; + } + + /* No need to do this here as the program is exiting */ + /* parser_deinit(&parser); */ + return 0; +} diff --git a/src/mem.c b/src/mem.c new file mode 100644 index 0000000..acd2ebd --- /dev/null +++ b/src/mem.c @@ -0,0 +1,10 @@ +#include "../include/mem.h" +#include + +void am_memcpy(void *dest, const void *src, usize size) { + memcpy(dest, src, size); +} + +bool am_memeql(const void *lhs, const void *rhs, usize size) { + return memcmp(lhs, rhs, size) == 0; +} \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..ddf4a18 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,187 @@ +#include "../include/parser.h" +#include "../include/ast.h" +#include "../include/misc.h" +#include "../include/alloc.h" +#include + +static WARN_UNUSED_RESULT int parser_parse_body(Parser *self, Ast *ast); + +int parser_init(Parser *self) { + buffer_init(&self->ast_objects); + return PARSER_OK; +} + +void parser_deinit(Parser *self) { + usize i; + for(i = 0; i < self->ast_objects.size / sizeof(Ast); ++i) { + ast_deinit((Ast*)&self->ast_objects.data[i]); + } + buffer_deinit(&self->ast_objects); +} + +static WARN_UNUSED_RESULT int parser_parse_lhs(Parser *self, LhsExpr **result) { + bool isConst; + BufferView var_name; + *result = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CONST, &isConst)); + if(!isConst) { + bool isVar; + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_VAR, &isVar)); + if(!isVar) + return PARSER_OK; + } + + return_if_error(tokenizer_accept(&self->tokenizer, TOK_IDENTIFIER)); + var_name = self->tokenizer.value.identifier; + fprintf(stderr, "var name: %.*s\n", (int)var_name.size, var_name.data); + return_if_error(am_malloc(sizeof(LhsExpr), (void**)result)); + lhsexpr_init(*result, isConst, var_name); + return PARSER_OK; +} + +static WARN_UNUSED_RESULT int parser_parse_function_decl(Parser *self, FunctionDecl **func_decl) { + bool result; + *func_decl = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_OPEN_PAREN, &result)); + if(!result) + return PARSER_OK; + + /* TODO: Parse parameters */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); + /* TODO: Parse return types */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_BRACE)); + + return_if_error(am_malloc(sizeof(FunctionDecl), (void**)func_decl)); + funcdecl_init(*func_decl); + + for(;;) { + Ast body_obj; + cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_CLOSING_BRACE, &result)); + if(result) + break; + + cleanup_if_error(parser_parse_body(self, &body_obj)); + cleanup_if_error(funcdecl_add_to_body(*func_decl, body_obj)); + } + return PARSER_OK; + + cleanup: + if(*func_decl) { + funcdecl_deinit(*func_decl); + am_free(*func_decl); + *func_decl = NULL; + } + return PARSER_ERR; +} + +static WARN_UNUSED_RESULT int parser_parse_function_call(Parser *self, FunctionCall **func_call) { + bool result; + BufferView func_name; + *func_call = NULL; + + return_if_error(tokenizer_consume_if(&self->tokenizer, TOK_IDENTIFIER, &result)); + if(!result) + return PARSER_OK; + + func_name = self->tokenizer.value.identifier; + return_if_error(tokenizer_accept(&self->tokenizer, TOK_OPEN_PAREN)); + /* TODO: Parse arguments */ + return_if_error(tokenizer_accept(&self->tokenizer, TOK_CLOSING_PAREN)); + + return_if_error(am_malloc(sizeof(FunctionCall), (void**)func_call)); + funccall_init(*func_call, func_name); + return PARSER_OK; +} + +static WARN_UNUSED_RESULT int parser_parse_rhs(Parser *self, Ast *rhs_expr) { + FunctionDecl *func_decl; + FunctionCall *func_call; + Token token; + func_decl = NULL; + func_call = NULL; + + cleanup_if_error(parser_parse_function_decl(self, &func_decl)); + if(func_decl) { + rhs_expr->type = AST_FUNCTION_DECL; + rhs_expr->value.func_decl = func_decl; + return PARSER_OK; + } + + cleanup_if_error(parser_parse_function_call(self, &func_call)); + if(func_call) { + rhs_expr->type = AST_FUNCTION_CALL; + rhs_expr->value.func_call = func_call; + return PARSER_OK; + } + + return_if_error(tokenizer_next(&self->tokenizer, &token)); + /* TODO: Convert token to string */ + tokenizer_print_error(&self->tokenizer, "Expected function declaration or function call, got token: %d"); + return PARSER_UNEXPECTED_TOKEN; + + cleanup: + if(func_decl) { + funcdecl_deinit(func_decl); + am_free(func_decl); + } + if(func_call) { + /*funccall_deinit(func_call);*/ + am_free(func_call); + } + return PARSER_ERR; +} + +int parser_parse_body(Parser *self, Ast *ast) { + LhsExpr *lhs_expr; + Ast rhs_expr; + + return_if_error(parser_parse_lhs(self, &lhs_expr)); + if(lhs_expr) + return_if_error(tokenizer_accept(&self->tokenizer, TOK_EQUALS)); + + cleanup_if_error(parser_parse_rhs(self, &rhs_expr)); + if(lhs_expr) { + lhs_expr->rhs_expr = rhs_expr; + ast->type = AST_LHS; + ast->value.lhs_expr = lhs_expr; + } else { + *ast = rhs_expr; + } + return PARSER_OK; + + cleanup: + if(lhs_expr) { + lhsexpr_deinit(lhs_expr); + am_free(lhs_expr); + } + return PARSER_ERR; +} + +int parser_parse_buffer(Parser *self, BufferView code_buffer) { + Ast ast; + ast = ast_none(); + return_if_error(tokenizer_init(&self->tokenizer, code_buffer)); + + for(;;) { + bool isEof; + cleanup_if_error(tokenizer_consume_if(&self->tokenizer, TOK_END_OF_FILE, &isEof)); + if(isEof) + goto cleanup_noerr; + + cleanup_if_error(parser_parse_body(self, &ast)); + cleanup_if_error(buffer_append(&self->ast_objects, &ast, sizeof(ast))); + /* For cleanup, we only want to cleanup the last created ast after parser_parse_body */ + ast = ast_none(); + } + + cleanup_noerr: + tokenizer_deinit(&self->tokenizer); + return PARSER_OK; + + cleanup: + tokenizer_deinit(&self->tokenizer); + ast_deinit(&ast); + return PARSER_ERR; +} diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 0000000..f1763a5 --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,186 @@ +#include "../include/tokenizer.h" +#include "../include/mem.h" +#include +#include +#include +#include + +static int isAlpha(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static int isDigit(int c) { + return c >= '0' && c <= '9'; +} + +static int isAlphaDigit(int c) { + return isAlpha(c) || isDigit(c); +} + +int tokenizer_init(Tokenizer *self, BufferView code) { + assert(code.size <= INT_MAX); + self->code = code; + self->index = 0; + self->prev_index = 0; + self->line = 1; + return 0; +} + +void tokenizer_deinit(Tokenizer *self) { + (void)self; +} + +static int tokenizer_get_char(Tokenizer *self) { + assert(self->index >= 0 && self->index < (int)self->code.size); + return self->code.data[self->index]; +} + +static Token tokenizer_skip_whitespace(Tokenizer *self) { + int c; + for(;;) { + if(self->index >= (int)self->code.size) + return TOK_END_OF_FILE; + + c = self->code.data[self->index]; + switch(c) { + case '\n': + ++self->line; + /* fallthrough */ + case ' ': + case '\t': + break; + default: + return TOK_NONE; + } + ++self->index; + } +} + +int tokenizer_next(Tokenizer *self, Token *token) { + Token last_token; + int c; + + last_token = tokenizer_skip_whitespace(self); + if(last_token == TOK_END_OF_FILE) { + *token = TOK_END_OF_FILE; + return TOKENIZER_OK; + } + + self->prev_index = self->index; + c = tokenizer_get_char(self); + if(isAlpha(c) || c == '_') { + int identifier_start; + identifier_start = self->index; + ++self->index; + + while(self->index < (int)self->code.size) { + c = tokenizer_get_char(self); + if(isAlphaDigit(c) || c == '_') + ++self->index; + else + break; + } + + self->value.identifier = create_buffer_view(self->code.data + identifier_start, self->index - identifier_start); + + if(am_memeql(self->value.identifier.data, "const", 5)) + *token = TOK_CONST; + else if(am_memeql(self->value.identifier.data, "var", 3)) + *token = TOK_VAR; + else + *token = TOK_IDENTIFIER; + } else if(c == '=') { + ++self->index; + *token = TOK_EQUALS; + } else if(c == '(') { + ++self->index; + *token = TOK_OPEN_PAREN; + } else if(c == ')') { + ++self->index; + *token = TOK_CLOSING_PAREN; + } else if(c == '{') { + ++self->index; + *token = TOK_OPEN_BRACE; + } else if(c == '}') { + ++self->index; + *token = TOK_CLOSING_BRACE; + } else { + /*self.printError("Unexpected symbol '{c}'", c);*/ + tokenizer_print_error(self, "Unexpected symbol '%c'", c); + return TOKENIZER_UNEXPECTED_TOKEN; + } + return TOKENIZER_OK; +} + +int tokenizer_accept(Tokenizer *self, Token expected_token) { + Token actual_token; + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) + return TOKENIZER_OK; + + /* Todo: convert token to string */ + tokenizer_print_error(self, "Expected %d, got %d", expected_token, actual_token); + return TOKENIZER_UNEXPECTED_TOKEN; +} + +int tokenizer_consume_if(Tokenizer *self, Token expected_token, bool *result) { + int index; + int line; + Token actual_token; + + index = self->index; + line = self->line; + return_if_error(tokenizer_next(self, &actual_token)); + if(actual_token == expected_token) { + *result = bool_true; + } else { + /* No need to restore self.prev_index as it's updated on the next call to tokenizer_next */ + self->index = index; + self->line = line; + *result = bool_false; + } + return TOKENIZER_OK; +} + +static int tokenizer_get_start_of_line_from_index(Tokenizer *self, int index) { + int c; + while(index >= 0) { + c = self->code.data[(usize)index]; + if(c == '\n' || c == '\r') { + return index + 1; + } + --index; + } + return 0; +} + +static int tokenizer_get_end_of_line_from_index(Tokenizer *self, int index) { + int c; + while(index < (int)self->code.size) { + c = self->code.data[(usize)index]; + if(c == '\n' || c == '\r') + break; + ++index; + } + return index; +} + +void tokenizer_print_error(Tokenizer *self, const char *fmt, ...) { + va_list args; + int line_start; + int line_end; + int prev_column; + int i; + + va_start(args, fmt); + line_start = tokenizer_get_start_of_line_from_index(self, self->prev_index); + line_end = tokenizer_get_end_of_line_from_index(self, self->prev_index); + prev_column = self->prev_index - line_start; + fprintf(stderr, "\x1b[1;37m%s:%d:%d:\x1b[0m \x1b[1;31merror:\x1b[0m ", "file.am", self->line, 1 + prev_column); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n%.*s\n", line_end - line_start, self->code.data + line_start); + for(i = 0; i < prev_column; ++i) + fprintf(stderr, " "); + fprintf(stderr, "\x1b[1;32m^\x1b[0m\n"); + va_end(args); +} \ No newline at end of file -- cgit v1.2.3