aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-10-18 18:27:11 +0200
committerdec05eba <dec05eba@protonmail.com>2020-10-18 18:27:11 +0200
commit4fe0a037c82332e84b16a6f0e2847a2f9a0bd5d7 (patch)
tree1789706873ccc7095414f44b1a85fe64b6e62345
parentf5dc7e2c85436877606af46a011c2fba112185a9 (diff)
Matrix: add a proper URI parser instead of regex for opening links. Show related body item image when pressing enter (for showing replied to media)
-rw-r--r--include/NetUtils.hpp11
-rw-r--r--plugins/Plugin.hpp5
-rw-r--r--src/NetUtils.cpp140
-rw-r--r--src/QuickMedia.cpp34
-rw-r--r--src/plugins/Fourchan.cpp1
-rw-r--r--src/plugins/Mangadex.cpp1
-rw-r--r--src/plugins/Manganelo.cpp1
-rw-r--r--src/plugins/Mangatown.cpp1
-rw-r--r--src/plugins/Matrix.cpp1
-rw-r--r--src/plugins/NyaaSi.cpp1
-rw-r--r--src/plugins/Plugin.cpp61
-rw-r--r--src/plugins/Pornhub.cpp1
-rw-r--r--src/plugins/Youtube.cpp1
-rw-r--r--tests/main.cpp38
14 files changed, 215 insertions, 82 deletions
diff --git a/include/NetUtils.hpp b/include/NetUtils.hpp
new file mode 100644
index 0000000..84b9d18
--- /dev/null
+++ b/include/NetUtils.hpp
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace QuickMedia {
+ void html_escape_sequences(std::string &str);
+ void html_unescape_sequences(std::string &str);
+ std::string url_param_encode(const std::string &param);
+ std::vector<std::string> extract_urls(const std::string &str);
+} \ No newline at end of file
diff --git a/plugins/Plugin.hpp b/plugins/Plugin.hpp
index 1427233..d754445 100644
--- a/plugins/Plugin.hpp
+++ b/plugins/Plugin.hpp
@@ -3,7 +3,6 @@
#include "../include/Body.hpp"
#include "../include/DownloadUtils.hpp"
#include <stddef.h>
-#include <string>
namespace QuickMedia {
enum class PluginResult {
@@ -36,10 +35,6 @@ namespace QuickMedia {
size_t index;
};
- void html_escape_sequences(std::string &str);
- void html_unescape_sequences(std::string &str);
- std::string url_param_encode(const std::string &param);
-
SuggestionResult download_result_to_suggestion_result(DownloadResult download_result);
PluginResult download_result_to_plugin_result(DownloadResult download_result);
SearchResult download_result_to_search_result(DownloadResult download_result);
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
new file mode 100644
index 0000000..e87c42c
--- /dev/null
+++ b/src/NetUtils.cpp
@@ -0,0 +1,140 @@
+#include "../include/NetUtils.hpp"
+#include "../include/StringUtils.hpp"
+#include <array>
+#include <sstream>
+#include <iomanip>
+
+namespace QuickMedia {
+ struct HtmlEscapeSequence {
+ char unescape_char;
+ std::string escape_sequence;
+ };
+
+ void html_escape_sequences(std::string &str) {
+ const std::array<HtmlEscapeSequence, 6> escape_sequences = {
+ HtmlEscapeSequence { '&', "&amp;" }, // This should be first, to not accidentally replace a new sequence caused by replacing this
+ HtmlEscapeSequence { '"', "&quot;" },
+ HtmlEscapeSequence { '\'', "&#39;" },
+ HtmlEscapeSequence { '<', "&lt;" },
+ HtmlEscapeSequence { '>', "&gt;" },
+ HtmlEscapeSequence { '\n', "<br>" }
+ };
+
+ for(const HtmlEscapeSequence &escape_sequence : escape_sequences) {
+ string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence);
+ }
+ }
+
+ struct HtmlUnescapeSequence {
+ std::string escape_sequence;
+ std::string unescaped_str;
+ };
+
+ void html_unescape_sequences(std::string &str) {
+ const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
+ HtmlUnescapeSequence { "&quot;", "\"" },
+ HtmlUnescapeSequence { "&#039;", "'" },
+ HtmlUnescapeSequence { "&#39;", "'" },
+ HtmlUnescapeSequence { "&lt;", "<" },
+ HtmlUnescapeSequence { "&gt;", ">" },
+ HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
+ };
+
+ for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) {
+ string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str);
+ }
+ }
+
+ std::string url_param_encode(const std::string &param) {
+ std::ostringstream result;
+ result.fill('0');
+ result << std::hex;
+
+ for(char c : param) {
+ if(isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
+ result << c;
+ } else {
+ result << std::uppercase;
+ result << "%" << std::setw(2) << (int)(unsigned char)(c);
+ }
+ }
+
+ return result.str();
+ }
+
+ static bool is_alpha(char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+ }
+
+ static bool is_digit(char c) {
+ return c >= '0' && c <= '9';
+ }
+
+ static bool is_url_character(char c) {
+ switch(c) {
+ case '%':
+ // Reserved
+ case ':':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ case '@':
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ // Unreserved:
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return true;
+ default:
+ return is_alpha(c) || is_digit(c);
+ }
+ }
+
+ // Implementation follows URI standard: https://tools.ietf.org/html/rfc3986#section-2.2
+ // TODO: Maybe check if the TLD only contains valid characters (is_alpha)?
+ std::vector<std::string> extract_urls(const std::string &str) {
+ std::vector<std::string> urls;
+
+ size_t url_start = std::string::npos;
+ size_t url_dot_index = std::string::npos;
+ // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
+ for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
+ char c = str[i];
+ if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
+ url_dot_index = i;
+ if(is_url_character(c)) {
+ if(url_start == std::string::npos)
+ url_start = i;
+ } else {
+ if(url_start != std::string::npos) {
+ // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
+ if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
+ size_t url_length = i - url_start;
+ char prev_char = str[i - 1];
+ // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
+ // and we want those links to work, I guess?
+ if(prev_char == '.' || prev_char == ',')
+ --url_length;
+ urls.push_back(str.substr(url_start, url_length));
+ }
+ url_start = std::string::npos;
+ url_dot_index = std::string::npos;
+ }
+ }
+ }
+ return urls;
+ }
+} \ No newline at end of file
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index 070b8fe..e374dfb 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -18,12 +18,12 @@
#include "../include/ImageUtils.hpp"
#include "../include/base64_url.hpp"
#include "../include/Entry.hpp"
+#include "../include/NetUtils.hpp"
#include <assert.h>
#include <cmath>
#include <string.h>
#include <signal.h>
-#include <regex>
#include <SFML/Graphics/RectangleShape.hpp>
#include <SFML/Window/Clipboard.hpp>
@@ -3225,8 +3225,6 @@ namespace QuickMedia {
const float chat_input_padding_x = 15.0f;
const float chat_input_padding_y = 15.0f;
- std::regex url_extract_regex("(http(s?):\\/\\/)?([a-zA-Z0-9\\-_]+\\.)+[a-zA-Z]+[^\\s.,]+");
-
Body url_selection_body(this, font.get(), bold_font.get(), cjk_font.get());
sf::Clock read_marker_timer;
@@ -3476,36 +3474,40 @@ namespace QuickMedia {
if(tabs[selected_tab].type == ChatTabType::MESSAGES && event.key.code == sf::Keyboard::Enter) {
BodyItem *selected = tabs[selected_tab].body->get_selected();
if(selected) {
- if(!selected->url.empty()) {
- const char *content_type = link_get_content_type(selected->url);
+ std::string selected_url = selected->url;
+ if(selected_url.empty() && selected->embedded_item)
+ selected_url = selected->embedded_item->url;
+ if(!selected_url.empty()) {
+ const char *content_type = link_get_content_type(selected_url);
if(content_type && (strcmp(content_type, "audio") == 0 || strcmp(content_type, "video") == 0 || strcmp(content_type, "image") == 0)) {
page_stack.push(PageType::CHAT);
watched_videos.clear();
current_page = PageType::VIDEO_CONTENT;
+ bool is_audio = strcmp(content_type, "audio") == 0;
+ bool prev_no_video = no_video;
+ no_video = is_audio;
// TODO: Add title
- video_content_page(video_page.get(), selected->url, "No title");
+ video_content_page(video_page.get(), selected_url, "No title");
+ no_video = prev_no_video;
redraw = true;
continue;
}
- launch_url(selected->url.c_str());
+ launch_url(selected_url);
continue;
}
// TODO: If content type is a file, show file-manager prompt where it should be saved and asynchronously save it instead
- // TODO: Change this when messages are not stored in the description
const std::string &message_str = selected->get_description();
- auto urls_begin = std::sregex_iterator(message_str.begin(), message_str.end(), url_extract_regex);
- auto urls_end = std::sregex_iterator();
- size_t num_urls = std::distance(urls_begin, urls_end);
- if(num_urls == 1) {
- launch_url(urls_begin->str());
- } else if(num_urls > 1) {
+ std::vector<std::string> urls = extract_urls(message_str);
+ if(urls.size() == 1) {
+ launch_url(urls[0]);
+ } else if(urls.size() > 1) {
chat_state = ChatState::URL_SELECTION;
url_selection_body.clear_items();
- for(auto it = urls_begin; it != urls_end; ++it) {
- auto body_item = BodyItem::create(it->str());
+ for(const std::string &url : urls) {
+ auto body_item = BodyItem::create(url);
url_selection_body.items.push_back(std::move(body_item));
}
}
diff --git a/src/plugins/Fourchan.cpp b/src/plugins/Fourchan.cpp
index 1d3681a..2938319 100644
--- a/src/plugins/Fourchan.cpp
+++ b/src/plugins/Fourchan.cpp
@@ -2,6 +2,7 @@
#include "../../include/DataView.hpp"
#include "../../include/Storage.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <json/reader.h>
#include <string.h>
#include <tidy.h>
diff --git a/src/plugins/Mangadex.cpp b/src/plugins/Mangadex.cpp
index f0e163e..0ccecdd 100644
--- a/src/plugins/Mangadex.cpp
+++ b/src/plugins/Mangadex.cpp
@@ -2,6 +2,7 @@
#include "../../include/Storage.hpp"
#include "../../include/Notification.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <rapidjson/document.h>
#include <rapidjson/reader.h>
#include <quickmedia/HtmlSearch.h>
diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp
index e96bc65..b260dea 100644
--- a/src/plugins/Manganelo.cpp
+++ b/src/plugins/Manganelo.cpp
@@ -1,6 +1,7 @@
#include "../../plugins/Manganelo.hpp"
#include "../../include/Notification.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
diff --git a/src/plugins/Mangatown.cpp b/src/plugins/Mangatown.cpp
index 5d6f97f..992e0cc 100644
--- a/src/plugins/Mangatown.cpp
+++ b/src/plugins/Mangatown.cpp
@@ -1,6 +1,7 @@
#include "../../plugins/Mangatown.hpp"
#include "../../include/Notification.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <quickmedia/HtmlSearch.h>
static const std::string mangatown_url = "https://www.mangatown.com";
diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp
index 5cf4611..d81c53c 100644
--- a/src/plugins/Matrix.cpp
+++ b/src/plugins/Matrix.cpp
@@ -1,6 +1,7 @@
#include "../../plugins/Matrix.hpp"
#include "../../include/Storage.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <rapidjson/document.h>
#include <rapidjson/writer.h>
#include <rapidjson/stringbuffer.h>
diff --git a/src/plugins/NyaaSi.cpp b/src/plugins/NyaaSi.cpp
index dc6e19f..860eb71 100644
--- a/src/plugins/NyaaSi.cpp
+++ b/src/plugins/NyaaSi.cpp
@@ -3,6 +3,7 @@
#include "../../include/Storage.hpp"
#include "../../include/Notification.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
diff --git a/src/plugins/Plugin.cpp b/src/plugins/Plugin.cpp
index 3f76b4c..0b554ae 100644
--- a/src/plugins/Plugin.cpp
+++ b/src/plugins/Plugin.cpp
@@ -1,67 +1,6 @@
#include "../../plugins/Plugin.hpp"
-#include "../../include/StringUtils.hpp"
-#include <sstream>
-#include <iomanip>
-#include <array>
namespace QuickMedia {
- struct HtmlEscapeSequence {
- char unescape_char;
- std::string escape_sequence;
- };
-
- void html_escape_sequences(std::string &str) {
- const std::array<HtmlEscapeSequence, 6> escape_sequences = {
- HtmlEscapeSequence { '&', "&amp;" }, // This should be first, to not accidentally replace a new sequence caused by replacing this
- HtmlEscapeSequence { '"', "&quot;" },
- HtmlEscapeSequence { '\'', "&#39;" },
- HtmlEscapeSequence { '<', "&lt;" },
- HtmlEscapeSequence { '>', "&gt;" },
- HtmlEscapeSequence { '\n', "<br>" }
- };
-
- for(const HtmlEscapeSequence &escape_sequence : escape_sequences) {
- string_replace_all(str, escape_sequence.unescape_char, escape_sequence.escape_sequence);
- }
- }
-
- struct HtmlUnescapeSequence {
- std::string escape_sequence;
- std::string unescaped_str;
- };
-
- void html_unescape_sequences(std::string &str) {
- const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
- HtmlUnescapeSequence { "&quot;", "\"" },
- HtmlUnescapeSequence { "&#039;", "'" },
- HtmlUnescapeSequence { "&#39;", "'" },
- HtmlUnescapeSequence { "&lt;", "<" },
- HtmlUnescapeSequence { "&gt;", ">" },
- HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
- };
-
- for(const HtmlUnescapeSequence &unescape_sequence : unescape_sequences) {
- string_replace_all(str, unescape_sequence.escape_sequence, unescape_sequence.unescaped_str);
- }
- }
-
- std::string url_param_encode(const std::string &param) {
- std::ostringstream result;
- result.fill('0');
- result << std::hex;
-
- for(char c : param) {
- if(isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
- result << c;
- } else {
- result << std::uppercase;
- result << "%" << std::setw(2) << (int)(unsigned char)(c);
- }
- }
-
- return result.str();
- }
-
SuggestionResult download_result_to_suggestion_result(DownloadResult download_result) { return (SuggestionResult)download_result; }
PluginResult download_result_to_plugin_result(DownloadResult download_result) { return (PluginResult)download_result; }
SearchResult download_result_to_search_result(DownloadResult download_result) { return (SearchResult)download_result; }
diff --git a/src/plugins/Pornhub.cpp b/src/plugins/Pornhub.cpp
index afdd8fc..e8df9d7 100644
--- a/src/plugins/Pornhub.cpp
+++ b/src/plugins/Pornhub.cpp
@@ -1,5 +1,6 @@
#include "../../plugins/Pornhub.hpp"
#include "../../include/StringUtils.hpp"
+#include "../../include/NetUtils.hpp"
#include <quickmedia/HtmlSearch.h>
#include <string.h>
diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp
index 1711e41..424a8d2 100644
--- a/src/plugins/Youtube.cpp
+++ b/src/plugins/Youtube.cpp
@@ -1,5 +1,6 @@
#include "../../plugins/Youtube.hpp"
#include "../../include/Storage.hpp"
+#include "../../include/NetUtils.hpp"
#include <string.h>
#include <unordered_set>
diff --git a/tests/main.cpp b/tests/main.cpp
new file mode 100644
index 0000000..38dd534
--- /dev/null
+++ b/tests/main.cpp
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include "../include/NetUtils.hpp"
+
+#define assert_fail(str) do { fprintf(stderr, "Assert failed on line %d, reason: %s\n", __LINE__, (str)); exit(1); } while(0)
+#define assert_equals(a, b) do { if((a) != (b)) { fprintf(stderr, "Assert failed on line %d, %s == %s\n", __LINE__, #a, #b); exit(1); } } while(0)
+
+int main() {
+ std::vector<std::string> urls;
+
+ urls = QuickMedia::extract_urls("example.com");
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "example.com");
+
+ urls = QuickMedia::extract_urls("example.com, is where I like to go");
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "example.com");
+
+ urls = QuickMedia::extract_urls("The website I like to go to is example.com");
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "example.com");
+
+ urls = QuickMedia::extract_urls("example.com. Is also a website");
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "example.com");
+
+ urls = QuickMedia::extract_urls("these. are. not. websites.");
+ assert_equals(urls.size(), 0);
+
+ urls = QuickMedia::extract_urls("This is not an url: example.");
+ assert_equals(urls.size(), 0);
+
+ urls = QuickMedia::extract_urls("the.se/~#423-_/2f.no/3df a.re considered sub.websit.es");
+ assert_equals(urls.size(), 3);
+ assert_equals(urls[0], "the.se/~#423-_/2f.no/3df");
+ assert_equals(urls[1], "a.re");
+ assert_equals(urls[2], "sub.websit.es");
+ return 0;
+}