diff options
Diffstat (limited to 'src/NetUtils.cpp')
-rw-r--r-- | src/NetUtils.cpp | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp new file mode 100644 index 0000000..e87c42c --- /dev/null +++ b/src/NetUtils.cpp @@ -0,0 +1,140 @@ +#include "../include/NetUtils.hpp" +#include "../include/StringUtils.hpp" +#include <array> +#include <sstream> +#include <iomanip> + +namespace QuickMedia { + struct HtmlEscapeSequence { + char unescape_char; + std::string escape_sequence; + }; + + void html_escape_sequences(std::string &str) { + const std::array<HtmlEscapeSequence, 6> escape_sequences = { + HtmlEscapeSequence { '&', "&" }, // This should be first, to not accidentally replace a new sequence caused by replacing this + HtmlEscapeSequence { '"', """ }, + HtmlEscapeSequence { '\'', "'" }, + HtmlEscapeSequence { '<', "<" }, + HtmlEscapeSequence { '>', ">" }, + HtmlEscapeSequence { '\n', "<br>" } + }; + + for(const HtmlEscapeSequence &escape_sequence : escape_sequences) { + string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence); + } + } + + struct HtmlUnescapeSequence { + std::string escape_sequence; + std::string unescaped_str; + }; + + void html_unescape_sequences(std::string &str) { + const std::array<HtmlUnescapeSequence, 6> unescape_sequences = { + HtmlUnescapeSequence { """, "\"" }, + HtmlUnescapeSequence { "'", "'" }, + HtmlUnescapeSequence { "'", "'" }, + HtmlUnescapeSequence { "<", "<" }, + HtmlUnescapeSequence { ">", ">" }, + HtmlUnescapeSequence { "&", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this + }; + + for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) { + string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str); + } + } + + std::string url_param_encode(const std::string ¶m) { + std::ostringstream result; + result.fill('0'); + result << std::hex; + + for(char c : param) { + if(isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { + result << c; + } else { + result << std::uppercase; + result << "%" << std::setw(2) << (int)(unsigned char)(c); + } + } + + return result.str(); + } + + static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + static bool is_digit(char c) { + return c >= '0' && c <= '9'; + } + + static bool is_url_character(char c) { + switch(c) { + case '%': + // Reserved + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + // Unreserved: + case '-': + case '.': + case '_': + case '~': + return true; + default: + return is_alpha(c) || is_digit(c); + } + } + + // Implementation follows URI standard: https://tools.ietf.org/html/rfc3986#section-2.2 + // TODO: Maybe check if the TLD only contains valid characters (is_alpha)? + std::vector<std::string> extract_urls(const std::string &str) { + std::vector<std::string> urls; + + size_t url_start = std::string::npos; + size_t url_dot_index = std::string::npos; + // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string + for(size_t i = 0; i < (size_t)str.size() + 1; ++i) { + char c = str[i]; + if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos) + url_dot_index = i; + if(is_url_character(c)) { + if(url_start == std::string::npos) + url_start = i; + } else { + if(url_start != std::string::npos) { + // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not. + if(url_dot_index != std::string::npos && url_dot_index != i - 1) { + size_t url_length = i - url_start; + char prev_char = str[i - 1]; + // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..." + // and we want those links to work, I guess? + if(prev_char == '.' || prev_char == ',') + --url_length; + urls.push_back(str.substr(url_start, url_length)); + } + url_start = std::string::npos; + url_dot_index = std::string::npos; + } + } + } + return urls; + } +}
\ No newline at end of file |