aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-04-09 21:35:39 +0200
committerdec05eba <dec05eba@protonmail.com>2021-04-09 21:45:28 +0200
commit6040726f92784978dd91eec4c540e92c4ca54236 (patch)
treece37ba29b40bf100c5bd577834ea4b6c90f4ab86
parent20a437763e56e5429ebd7f38940c4107418e3dee (diff)
Add .onion to list of valid tld, properly parse urls protocol part
-rw-r--r--include/Body.hpp1
-rw-r--r--src/Body.cpp2
-rw-r--r--src/NetUtils.cpp49
-rw-r--r--src/QuickMedia.cpp2
-rw-r--r--tests/main.cpp10
5 files changed, 57 insertions, 7 deletions
diff --git a/include/Body.hpp b/include/Body.hpp
index eb74837..7c8226e 100644
--- a/include/Body.hpp
+++ b/include/Body.hpp
@@ -263,6 +263,7 @@ namespace QuickMedia {
std::function<void(BodyItem*)> body_item_select_callback;
sf::Shader *thumbnail_mask_shader;
AttachSide attach_side = AttachSide::TOP;
+ bool title_mark_urls = false;
private:
void draw_item(sf::RenderWindow &window, BodyItem *item, const sf::Vector2f &pos, const sf::Vector2f &size, const float item_height, const int item_index, const Json::Value &content_progress, bool include_embedded_item = true, bool merge_with_previous = false);
void update_dirty_state(BodyItem *body_item, float width);
diff --git a/src/Body.cpp b/src/Body.cpp
index 1e45dcb..32d70fc 100644
--- a/src/Body.cpp
+++ b/src/Body.cpp
@@ -789,7 +789,7 @@ namespace QuickMedia {
if(body_item->title_text)
body_item->title_text->setString(std::move(str));
else
- body_item->title_text = std::make_unique<Text>(std::move(str), false, std::floor(16 * get_ui_scale()), width);
+ body_item->title_text = std::make_unique<Text>(std::move(str), false, std::floor(16 * get_ui_scale()), width, title_mark_urls);
body_item->title_text->setFillColor(body_item->get_title_color());
body_item->title_text->updateGeometry();
}
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index d5795c2..8bb5a0e 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -902,6 +902,7 @@ namespace QuickMedia {
"ong",
"onl",
"online",
+ "onion",
"onyourside",
"ooo",
"open",
@@ -1583,7 +1584,11 @@ namespace QuickMedia {
}
static bool is_url_start_char(char c) {
- return is_alpha(c) || is_digit(c);
+ return is_alpha(c) || is_digit(c) || c == '-' || c == '.' || c == '_' || c == '~';
+ }
+
+ static bool is_url_domain_char(char c) {
+ return is_url_start_char(c);
}
// Implementation follows URI standard in general: https://tools.ietf.org/html/rfc3986#section-2.2.
@@ -1593,10 +1598,12 @@ namespace QuickMedia {
int parentheses_depth = 0;
bool is_valid_url = false;
+ bool is_domain_part = true;
+ bool contains_dot = false;
size_t url_start = std::string::npos;
// str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
- for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
+ for(size_t i = 0; i < (size_t)str.size() + 1;) {
char c = str[i];
if(url_start != std::string::npos) {
@@ -1606,12 +1613,16 @@ namespace QuickMedia {
--parentheses_depth;
}
- if(url_start != std::string::npos && !is_valid_url && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) {
+ if(url_start != std::string::npos && c == '.') {
+ contains_dot = true;
+ }
+
+ if(url_start != std::string::npos && !is_valid_url && contains_dot && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) {
size_t tld_end = i - 1;
char prev_char = str[i - 1];
// We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
// and we want those links to work, I guess?
- if(prev_char == '.' || prev_char == ',')
+ if(prev_char == '.' || prev_char == ',' || prev_char == ':')
--tld_end;
else if(prev_char == ')' && parentheses_depth != 0)
--tld_end;
@@ -1623,8 +1634,30 @@ namespace QuickMedia {
--tld_start;
}
- if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end())
+ if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end()) {
is_valid_url = true;
+ is_domain_part = false;
+ }
+ }
+
+ if(url_start != std::string::npos && is_domain_part && c == ':') {
+ if(i + 2 < (size_t)str.size() + 1 && str[i + 1] == '/' && str[i + 2] == '/') {
+ i += 3;
+ continue;
+ } else if(i + 1 < (size_t)str.size() + 1 && is_whitespace(str[i + 1])) {
+ i += 1;
+ } else {
+ url_start = std::string::npos;
+ is_valid_url = false;
+ is_domain_part = true;
+ contains_dot = false;
+ }
+ }
+
+ if(url_start != std::string::npos && is_domain_part && !is_url_domain_char(c)) {
+ url_start = std::string::npos;
+ is_valid_url = false;
+ contains_dot = false;
}
if(url_start == std::string::npos && is_url_start_char(c)) {
@@ -1636,7 +1669,7 @@ namespace QuickMedia {
char prev_char = str[i - 1];
// We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
// and we want those links to work, I guess?
- if(prev_char == '.' || prev_char == ',')
+ if(prev_char == '.' || prev_char == ',' || prev_char == ':')
--url_length;
else if(prev_char == ')' && parentheses_depth != 0)
--url_length;
@@ -1646,7 +1679,11 @@ namespace QuickMedia {
url_start = std::string::npos;
is_valid_url = false;
+ is_domain_part = true;
+ contains_dot = false;
}
+
+ ++i;
}
return ranges;
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index 4598742..a5318be 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -2722,6 +2722,8 @@ namespace QuickMedia {
VIEWING_ATTACHED_IMAGE
};
+ thread_body->title_mark_urls = true;
+
NavigationStage navigation_stage = NavigationStage::VIEWING_COMMENTS;
AsyncTask<bool> captcha_request_future;
AsyncTask<bool> captcha_post_solution_future;
diff --git a/tests/main.cpp b/tests/main.cpp
index c5138e3..306cdf2 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -28,6 +28,11 @@ int main() {
assert_equals(urls.size(), 1);
assert_equals(urls[0], "example.com");
+ str = "example.com: the best test website";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "example.com");
+
str = "these. are. not. websites.";
urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 0);
@@ -47,5 +52,10 @@ int main() {
urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/");
+
+ str = "[sneed](https://sneedville.com)";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
+ assert_equals(urls.size(), 1);
+ assert_equals(urls[0], "https://sneedville.com");
return 0;
}