1 files changed, 140 insertions, 0 deletions
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
new file mode 100644
index 0000000..e87c42c
--- /dev/null
+++ b/src/NetUtils.cpp
@@ -0,0 +1,140 @@
+#include "../include/NetUtils.hpp"
+#include "../include/StringUtils.hpp"
+#include <array>
+#include <sstream>
+#include <iomanip>
+
+namespace QuickMedia {
+    struct HtmlEscapeSequence {
+        char unescape_char;
+        std::string escape_sequence;
+    };
+
+    void html_escape_sequences(std::string &str) {
+        const std::array<HtmlEscapeSequence, 6> escape_sequences = {
+            HtmlEscapeSequence { '&', "&amp;" }, // This should be first, to not accidentally replace a new sequence caused by replacing this
+            HtmlEscapeSequence { '"', "&quot;" },
+            HtmlEscapeSequence { '\'', "&#39;" },
+            HtmlEscapeSequence { '<', "&lt;" },
+            HtmlEscapeSequence { '>', "&gt;" },
+            HtmlEscapeSequence { '\n', "<br>" }
+        };
+
+        for(const HtmlEscapeSequence &escape_sequence : escape_sequences) {
+            string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence);
+        }
+    }
+
+    struct HtmlUnescapeSequence {
+        std::string escape_sequence;
+        std::string unescaped_str;
+    };
+
+    void html_unescape_sequences(std::string &str) {
+        const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
+            HtmlUnescapeSequence { "&quot;", "\"" },
+            HtmlUnescapeSequence { "&#039;", "'" },
+            HtmlUnescapeSequence { "&#39;", "'" },
+            HtmlUnescapeSequence { "&lt;", "<" },
+            HtmlUnescapeSequence { "&gt;", ">" },
+            HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
+        };
+
+        for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) {
+            string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str);
+        }
+    }
+
+    std::string url_param_encode(const std::string &param) {
+        std::ostringstream result;
+        result.fill('0');
+        result << std::hex;
+
+        for(char c : param) {
+            if(isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
+                result << c;
+            } else {
+                result << std::uppercase;
+                result << "%" << std::setw(2) << (int)(unsigned char)(c);
+            }
+        }
+
+        return result.str();
+    }
+
+    static bool is_alpha(char c) {
+        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+    }
+
+    static bool is_digit(char c) {
+        return c >= '0' && c <= '9';
+    }
+
+    static bool is_url_character(char c) {
+        switch(c) {
+            case '%':
+            // Reserved
+            case ':':
+            case '/':
+            case '?':
+            case '#':
+            case '[':
+            case ']':
+            case '@':
+            case '!':
+            case '$':
+            case '&':
+            case '\'':
+            case '(':
+            case ')':
+            case '*':
+            case '+':
+            case ',':
+            case ';':
+            case '=':
+            // Unreserved:
+            case '-':
+            case '.':
+            case '_':
+            case '~':
+                return true;
+            default:
+                return is_alpha(c) || is_digit(c);
+        }
+    }
+
+    // Implementation follows URI standard: https://tools.ietf.org/html/rfc3986#section-2.2
+    // TODO: Maybe check if the TLD only contains valid characters (is_alpha)?
+    std::vector<std::string> extract_urls(const std::string &str) {
+        std::vector<std::string> urls;
+
+        size_t url_start = std::string::npos;
+        size_t url_dot_index = std::string::npos;
+        // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
+        for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
+            char c = str[i];
+            if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
+                url_dot_index = i;
+            if(is_url_character(c)) {
+                if(url_start == std::string::npos)
+                    url_start = i;
+            } else {
+                if(url_start != std::string::npos) {
+                    // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
+                    if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
+                        size_t url_length = i - url_start;
+                        char prev_char = str[i - 1];
+                        // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
+                        // and we want those links to work, I guess?
+                        if(prev_char == '.' || prev_char == ',')
+                            --url_length;
+                        urls.push_back(str.substr(url_start, url_length));
+                    }
+                    url_start = std::string::npos;
+                    url_dot_index = std::string::npos;
+                }
+            }
+        }
+        return urls;
+    }
+}
+\ No newline at end of file