From fb2072deb3e50afdb062570a3a80ec1afb5bfb56 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 9 Dec 2017 01:14:41 +0100 Subject: Finished project config file parsing --- src/Conf.cpp | 336 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 330 insertions(+), 6 deletions(-) (limited to 'src/Conf.cpp') diff --git a/src/Conf.cpp b/src/Conf.cpp index d535c60..cea1b01 100644 --- a/src/Conf.cpp +++ b/src/Conf.cpp @@ -1,18 +1,342 @@ #include "../include/Conf.hpp" #include "../include/FileUtil.hpp" +#include "../external/utf8/unchecked.h" +#include using namespace std; +using u8string = utf8::unchecked::iterator; namespace sibs { - Result readConf(const char *filepath) + u32 min(u32 a, u32 b) { return a < b ? a : b; } + + class UnexpectedTokenException : public std::runtime_error + { + public: + UnexpectedTokenException(const string &errMsg) : runtime_error(errMsg) + { + + } + }; + + enum class Token + { + NONE, + END_OF_FILE, + IDENTIFIER, + OPEN_BRACKET, + CLOSING_BRACKET, + EQUALS, + STRING + }; + + const char *getTokenName(Token token) + { + switch(token) + { + case Token::NONE: return "NONE"; + case Token::END_OF_FILE: return ""; + case Token::IDENTIFIER: return "identifier"; + case Token::OPEN_BRACKET: return "["; + case Token::CLOSING_BRACKET: return "]"; + case Token::EQUALS: return "="; + case Token::STRING: return "string"; + default: return "Unknown"; + } + } + + class Tokenizer + { + public: + Tokenizer(const char *_code) : + currentToken(Token::NONE), + code((char*)_code) + { + + } + + Token nextToken() + { + u32 c = *code; + while(isWhitespace(c)) + { + ++code; + c = *code; + } + + if(isAlpha(c) || c == '_') + { + char *startOfIdentifier = code.base(); + ++code; + c = *code; + while(isAlpha(c) || c == '_' || isDigit(c)) + { + ++code; + c = *code; + } + char *endOfIdentifier = code.base(); + + identifier = StringView(startOfIdentifier, endOfIdentifier - startOfIdentifier); + return Token::IDENTIFIER; + } + else if(c == '[') + { + ++code; + return Token::OPEN_BRACKET; + } + else if(c == ']') + { + ++code; + return Token::CLOSING_BRACKET; + } + else if(c == '=') + { + ++code; + return Token::EQUALS; + } + else if(c == '"') + { + u32 escapeCount = 0; + + ++code; + char *startOfStr = code.base(); + while(escapeCount > 0 || *code != '"') + { + c = *code; + if(c == '\0') + return Token::END_OF_FILE; + else if(c == '\\') + ++escapeCount; + else + escapeCount = min(0, escapeCount - 1); + ++code; + } + + str = StringView(startOfStr, code.base() - startOfStr); + ++code; + return Token::STRING; + } + else if(c == '\0') + { + return Token::END_OF_FILE; + } + else + { + string errMsg = "Unexpected token: "; + errMsg += (char)c; + throw UnexpectedTokenException(errMsg); + } + } + + StringView getIdentifier() const { return identifier; } + StringView getString() const { return str; } + private: + bool isWhitespace(u32 c) + { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + } + + bool isAlpha(u32 c) + { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + bool isDigit(u32 c) + { + return c >= '0' && c <= '9'; + } + private: + Token currentToken; + u8string code; + + union + { + StringView identifier; + StringView str; + }; + }; + + class ParserException : public std::runtime_error { - Result fileContentResult = getFileContent(filepath); + public: + ParserException(const string &errMsg) : runtime_error(errMsg) + { + + } + }; + + class Parser + { + public: + static Result parse(const char *code, const ConfigCallback &callback) + { + try + { + Parser parser(code, (ConfigCallback*)&callback); + parser.parse(); + return Result::Ok(true); + } + catch (const UnexpectedTokenException &e) + { + return Result::Err(e.what()); + } + catch (const ParserException &e) + { + return Result::Err(e.what()); + } + } + private: + Parser(const char *code, ConfigCallback *_callback) : + tokenizer(code), + callback(_callback), + objectDefined(false) + { + + } + + void parse() + { + while(true) + { + Token token = tokenizer.nextToken(); + switch(token) + { + case Token::IDENTIFIER: + { + parseConfigField(); + break; + } + case Token::OPEN_BRACKET: + { + parseConfigObject(); + break; + } + case Token::END_OF_FILE: + { + callback->finished(); + return; + } + default: + { + string errMsg = "Expected identifier or object, got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + } + } + + void parseConfigField() + { + StringView fieldName = tokenizer.getIdentifier(); + if (!objectDefined) + { + string errMsg = "An object has to be the first element defined in a config file"; + throw ParserException(errMsg); + } + + Token token = tokenizer.nextToken(); + if(token == Token::EQUALS) + { + parseConfigFieldRhs(fieldName); + } + else + { + string errMsg = "Expected '=' after identifier, got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + + void parseConfigFieldRhs(StringView fieldName) + { + Token token = tokenizer.nextToken(); + if(token == Token::STRING) + { + callback->processField(fieldName, tokenizer.getString()); + } + else if(token == Token::OPEN_BRACKET) + { + token = tokenizer.nextToken(); + if(token == Token::STRING) + { + StringView str = tokenizer.getString(); + token = tokenizer.nextToken(); + if(token == Token::CLOSING_BRACKET) + { + vector values; + values.push_back(str); + callback->processField(fieldName, values); + } + else + { + string errMsg = "Expected ']' to close value list, got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + else + { + string errMsg = "Expected string value inside list in field definition, got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + else + { + string errMsg = "Expected string on right-hand side of field '"; + errMsg += string(fieldName.data, fieldName.size); + errMsg += "', got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + + void parseConfigObject() + { + Token token = tokenizer.nextToken(); + if(token == Token::IDENTIFIER) + { + StringView objectName = tokenizer.getIdentifier(); + token = tokenizer.nextToken(); + if(token == Token::CLOSING_BRACKET) + { + objectDefined = true; + callback->processObject(objectName); + } + else + { + string errMsg = "Expected ']' after identifier to close object definition, got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + else + { + string errMsg = "Expected identifier after '[', got: "; + errMsg += getTokenName(token); + throw ParserException(errMsg); + } + } + private: + Tokenizer tokenizer; + ConfigCallback *callback; + bool objectDefined; + }; + + Result Config::readFromFile(const char *filepath, const ConfigCallback &callback) + { + Result fileContentResult = getFileContent(filepath); if(fileContentResult.isErr()) - return fileContentResult; + return Result::Err(fileContentResult.getErrMsg()); + + const char *code = fileContentResult.unwrap().data; + if(!utf8::is_valid(code, code + fileContentResult.unwrap().size)) + return Result::Err("File is not in valid utf8 format"); + + if(fileContentResult.unwrap().size >= 3 && utf8::is_bom(code)) + code += 3; - string configData; - printf("file content:\n%s\n", fileContentResult.unwrap().c_str()); - return Result::Ok(std::move(configData)); + return Parser::parse(code, callback); } } \ No newline at end of file -- cgit v1.2.3