From 6040726f92784978dd91eec4c540e92c4ca54236 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 9 Apr 2021 21:35:39 +0200 Subject: Add .onion to list of valid tld, properly parse urls protocol part --- include/Body.hpp | 1 + src/Body.cpp | 2 +- src/NetUtils.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++------ src/QuickMedia.cpp | 2 ++ tests/main.cpp | 10 ++++++++++ 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/include/Body.hpp b/include/Body.hpp index eb74837..7c8226e 100644 --- a/include/Body.hpp +++ b/include/Body.hpp @@ -263,6 +263,7 @@ namespace QuickMedia { std::function body_item_select_callback; sf::Shader *thumbnail_mask_shader; AttachSide attach_side = AttachSide::TOP; + bool title_mark_urls = false; private: void draw_item(sf::RenderWindow &window, BodyItem *item, const sf::Vector2f &pos, const sf::Vector2f &size, const float item_height, const int item_index, const Json::Value &content_progress, bool include_embedded_item = true, bool merge_with_previous = false); void update_dirty_state(BodyItem *body_item, float width); diff --git a/src/Body.cpp b/src/Body.cpp index 1e45dcb..32d70fc 100644 --- a/src/Body.cpp +++ b/src/Body.cpp @@ -789,7 +789,7 @@ namespace QuickMedia { if(body_item->title_text) body_item->title_text->setString(std::move(str)); else - body_item->title_text = std::make_unique(std::move(str), false, std::floor(16 * get_ui_scale()), width); + body_item->title_text = std::make_unique(std::move(str), false, std::floor(16 * get_ui_scale()), width, title_mark_urls); body_item->title_text->setFillColor(body_item->get_title_color()); body_item->title_text->updateGeometry(); } diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp index d5795c2..8bb5a0e 100644 --- a/src/NetUtils.cpp +++ b/src/NetUtils.cpp @@ -902,6 +902,7 @@ namespace QuickMedia { "ong", "onl", "online", + "onion", "onyourside", "ooo", "open", @@ -1583,7 +1584,11 @@ namespace QuickMedia { } static bool is_url_start_char(char c) { - return is_alpha(c) || is_digit(c); + return is_alpha(c) || is_digit(c) || c == '-' || c == '.' || c == '_' || c == '~'; + } + + static bool is_url_domain_char(char c) { + return is_url_start_char(c); } // Implementation follows URI standard in general: https://tools.ietf.org/html/rfc3986#section-2.2. @@ -1593,10 +1598,12 @@ namespace QuickMedia { int parentheses_depth = 0; bool is_valid_url = false; + bool is_domain_part = true; + bool contains_dot = false; size_t url_start = std::string::npos; // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string - for(size_t i = 0; i < (size_t)str.size() + 1; ++i) { + for(size_t i = 0; i < (size_t)str.size() + 1;) { char c = str[i]; if(url_start != std::string::npos) { @@ -1606,12 +1613,16 @@ namespace QuickMedia { --parentheses_depth; } - if(url_start != std::string::npos && !is_valid_url && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) { + if(url_start != std::string::npos && c == '.') { + contains_dot = true; + } + + if(url_start != std::string::npos && !is_valid_url && contains_dot && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) { size_t tld_end = i - 1; char prev_char = str[i - 1]; // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..." // and we want those links to work, I guess? - if(prev_char == '.' || prev_char == ',') + if(prev_char == '.' || prev_char == ',' || prev_char == ':') --tld_end; else if(prev_char == ')' && parentheses_depth != 0) --tld_end; @@ -1623,8 +1634,30 @@ namespace QuickMedia { --tld_start; } - if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end()) + if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end()) { is_valid_url = true; + is_domain_part = false; + } + } + + if(url_start != std::string::npos && is_domain_part && c == ':') { + if(i + 2 < (size_t)str.size() + 1 && str[i + 1] == '/' && str[i + 2] == '/') { + i += 3; + continue; + } else if(i + 1 < (size_t)str.size() + 1 && is_whitespace(str[i + 1])) { + i += 1; + } else { + url_start = std::string::npos; + is_valid_url = false; + is_domain_part = true; + contains_dot = false; + } + } + + if(url_start != std::string::npos && is_domain_part && !is_url_domain_char(c)) { + url_start = std::string::npos; + is_valid_url = false; + contains_dot = false; } if(url_start == std::string::npos && is_url_start_char(c)) { @@ -1636,7 +1669,7 @@ namespace QuickMedia { char prev_char = str[i - 1]; // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..." // and we want those links to work, I guess? - if(prev_char == '.' || prev_char == ',') + if(prev_char == '.' || prev_char == ',' || prev_char == ':') --url_length; else if(prev_char == ')' && parentheses_depth != 0) --url_length; @@ -1646,7 +1679,11 @@ namespace QuickMedia { url_start = std::string::npos; is_valid_url = false; + is_domain_part = true; + contains_dot = false; } + + ++i; } return ranges; diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index 4598742..a5318be 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -2722,6 +2722,8 @@ namespace QuickMedia { VIEWING_ATTACHED_IMAGE }; + thread_body->title_mark_urls = true; + NavigationStage navigation_stage = NavigationStage::VIEWING_COMMENTS; AsyncTask captcha_request_future; AsyncTask captcha_post_solution_future; diff --git a/tests/main.cpp b/tests/main.cpp index c5138e3..306cdf2 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -28,6 +28,11 @@ int main() { assert_equals(urls.size(), 1); assert_equals(urls[0], "example.com"); + str = "example.com: the best test website"; + urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str)); + assert_equals(urls.size(), 1); + assert_equals(urls[0], "example.com"); + str = "these. are. not. websites."; urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str)); assert_equals(urls.size(), 0); @@ -47,5 +52,10 @@ int main() { urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str)); assert_equals(urls.size(), 1); assert_equals(urls[0], "https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/"); + + str = "[sneed](https://sneedville.com)"; + urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str)); + assert_equals(urls.size(), 1); + assert_equals(urls[0], "https://sneedville.com"); return 0; } -- cgit v1.2.3