aboutsummaryrefslogtreecommitdiff
path: root/src/NetUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/NetUtils.cpp')
-rw-r--r--src/NetUtils.cpp140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
new file mode 100644
index 0000000..e87c42c
--- /dev/null
+++ b/src/NetUtils.cpp
@@ -0,0 +1,140 @@
+#include "../include/NetUtils.hpp"
+#include "../include/StringUtils.hpp"
+#include <array>
+#include <sstream>
+#include <iomanip>
+
+namespace QuickMedia {
+ struct HtmlEscapeSequence {
+ char unescape_char;
+ std::string escape_sequence;
+ };
+
+ void html_escape_sequences(std::string &str) {
+ const std::array<HtmlEscapeSequence, 6> escape_sequences = {
+ HtmlEscapeSequence { '&', "&amp;" }, // This should be first, to not accidentally replace a new sequence caused by replacing this
+ HtmlEscapeSequence { '"', "&quot;" },
+ HtmlEscapeSequence { '\'', "&#39;" },
+ HtmlEscapeSequence { '<', "&lt;" },
+ HtmlEscapeSequence { '>', "&gt;" },
+ HtmlEscapeSequence { '\n', "<br>" }
+ };
+
+ for(const HtmlEscapeSequence &escape_sequence : escape_sequences) {
+ string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence);
+ }
+ }
+
+ struct HtmlUnescapeSequence {
+ std::string escape_sequence;
+ std::string unescaped_str;
+ };
+
+ void html_unescape_sequences(std::string &str) {
+ const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
+ HtmlUnescapeSequence { "&quot;", "\"" },
+ HtmlUnescapeSequence { "&#039;", "'" },
+ HtmlUnescapeSequence { "&#39;", "'" },
+ HtmlUnescapeSequence { "&lt;", "<" },
+ HtmlUnescapeSequence { "&gt;", ">" },
+ HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
+ };
+
+ for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) {
+ string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str);
+ }
+ }
+
+ std::string url_param_encode(const std::string &param) {
+ std::ostringstream result;
+ result.fill('0');
+ result << std::hex;
+
+ for(char c : param) {
+ if(isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
+ result << c;
+ } else {
+ result << std::uppercase;
+ result << "%" << std::setw(2) << (int)(unsigned char)(c);
+ }
+ }
+
+ return result.str();
+ }
+
+ static bool is_alpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+ }
+
+ static bool is_digit(char c) {
+ return c >= '0' && c <= '9';
+ }
+
+ static bool is_url_character(char c) {
+ switch(c) {
+ case '%':
+ // Reserved
+ case ':':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ case '@':
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ // Unreserved:
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return true;
+ default:
+ return is_alpha(c) || is_digit(c);
+ }
+ }
+
+ // Implementation follows URI standard: https://tools.ietf.org/html/rfc3986#section-2.2
+ // TODO: Maybe check if the TLD only contains valid characters (is_alpha)?
+ std::vector<std::string> extract_urls(const std::string &str) {
+ std::vector<std::string> urls;
+
+ size_t url_start = std::string::npos;
+ size_t url_dot_index = std::string::npos;
+ // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
+ for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
+ char c = str[i];
+ if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
+ url_dot_index = i;
+ if(is_url_character(c)) {
+ if(url_start == std::string::npos)
+ url_start = i;
+ } else {
+ if(url_start != std::string::npos) {
+ // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
+ if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
+ size_t url_length = i - url_start;
+ char prev_char = str[i - 1];
+ // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
+ // and we want those links to work, I guess?
+ if(prev_char == '.' || prev_char == ',')
+ --url_length;
+ urls.push_back(str.substr(url_start, url_length));
+ }
+ url_start = std::string::npos;
+ url_dot_index = std::string::npos;
+ }
+ }
+ }
+ return urls;
+ }
+} \ No newline at end of file