#include "../include/NetUtils.hpp"
#include "../include/StringUtils.hpp"
#include <array>
#include <sstream>
#include <iomanip>

namespace QuickMedia {
    struct HtmlEscapeSequence {
        char unescape_char;
        std::string escape_sequence;
    };

    void html_escape_sequences(std::string &str) {
        const std::array<HtmlEscapeSequence, 6> escape_sequences = {
            HtmlEscapeSequence { '&', "&amp;" }, // This should be first, to not accidentally replace a new sequence caused by replacing this
            HtmlEscapeSequence { '"', "&quot;" },
            HtmlEscapeSequence { '\'', "&#39;" },
            HtmlEscapeSequence { '<', "&lt;" },
            HtmlEscapeSequence { '>', "&gt;" },
            HtmlEscapeSequence { '\n', "<br>" }
        };

        for(const HtmlEscapeSequence &escape_sequence : escape_sequences) {
            string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence);
        }
    }

    struct HtmlUnescapeSequence {
        std::string escape_sequence;
        std::string unescaped_str;
    };

    void html_unescape_sequences(std::string &str) {
        const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
            HtmlUnescapeSequence { "&quot;", "\"" },
            HtmlUnescapeSequence { "&#039;", "'" },
            HtmlUnescapeSequence { "&#39;", "'" },
            HtmlUnescapeSequence { "&lt;", "<" },
            HtmlUnescapeSequence { "&gt;", ">" },
            HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
        };

        for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) {
            string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str);
        }
    }

    static bool is_alpha(char c) {
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
    }

    static bool is_digit(char c) {
        return c >= '0' && c <= '9';
    }

    std::string url_param_encode(const std::string &param) {
        std::ostringstream result;
        result.fill('0');
        result << std::hex;

        for(char c : param) {
            if(is_alpha(c) || is_digit(c) || c == '-' || c == '_' || c == '.' || c == '~') {
                result << c;
            } else {
                result << std::uppercase;
                result << "%" << std::setw(2) << (int)(unsigned char)(c);
            }
        }

        return result.str();
    }

    static bool is_url_start_char(char c) {
        return is_alpha(c) || is_digit(c);
    }

    static bool is_url_character(char c) {
        switch(c) {
            case '%':
            // Reserved
            case ':':
            case '/':
            case '?':
            case '#':
            case '[':
            case ']':
            case '@':
            case '!':
            case '$':
            case '&':
            case '\'':
            case '(':
            case ')':
            case '*':
            case '+':
            case ',':
            case ';':
            case '=':
            // Unreserved:
            case '-':
            case '.':
            case '_':
            case '~':
                return true;
            default:
                return is_alpha(c) || is_digit(c);
        }
    }

    // Implementation follows URI standard in general: https://tools.ietf.org/html/rfc3986#section-2.2.
    // Also checks for balanced parentheses to allow text such as: (see: example.com/) that excludes the last parenthesis.
    void extract_urls(const std::string &str, std::vector<std::string> &urls) {
        int parentheses_depth = 0;
        size_t url_start = std::string::npos;
        size_t url_dot_index = std::string::npos;
        // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
        for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
            char c = str[i];
            if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
                url_dot_index = i;

            if(url_start != std::string::npos) {
                if(c == '(')
                    ++parentheses_depth;
                else if(c == ')')
                    --parentheses_depth;
            }

            if(url_start == std::string::npos && is_url_start_char(c)) {
                url_start = i;
            } else if(url_start != std::string::npos && !is_url_character(c)) {
                // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
                if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
                    size_t url_length = i - url_start;
                    char prev_char = str[i - 1];
                    // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
                    // and we want those links to work, I guess?
                    if(prev_char == '.' || prev_char == ',')
                        --url_length;
                    if(prev_char == ')' && parentheses_depth != 0)
                        --url_length;
                    if(url_length > 0)
                        urls.push_back(str.substr(url_start, url_length));
                }
                url_start = std::string::npos;
                url_dot_index = std::string::npos;
            }
    }
    }
}