From be20a78ab01b924fc1261ff3c71361feb440e592 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 3 Oct 2021 09:01:14 +0200 Subject: xv: fix missing search results, incorrect title to thumbnail matches --- include/StringUtils.hpp | 3 +++ plugins/MediaGeneric.hpp | 3 ++- src/NetUtils.cpp | 23 +++++++++++++++++------ src/QuickMedia.cpp | 10 +++++++--- src/StringUtils.cpp | 25 +++++++++++++++++++++++++ src/plugins/MediaGeneric.cpp | 12 ++++++------ tests/main.cpp | 8 ++++++++ 7 files changed, 68 insertions(+), 16 deletions(-) diff --git a/include/StringUtils.hpp b/include/StringUtils.hpp index 6554ea7..29befc7 100644 --- a/include/StringUtils.hpp +++ b/include/StringUtils.hpp @@ -22,6 +22,9 @@ namespace QuickMedia { char to_upper(char c); bool strncase_equals(const char *str1, const char *str2, size_t length); bool strcase_equals(const char *str1, const char *str2); + // Note: does not check for overflow bool to_num(const char *str, size_t size, int &num); + // Note: does not check for overflow + bool to_num_hex(const char *str, size_t size, int &num); std::string seconds_to_relative_time_str(time_t seconds); } \ No newline at end of file diff --git a/plugins/MediaGeneric.hpp b/plugins/MediaGeneric.hpp index fce0e97..ca111a4 100644 --- a/plugins/MediaGeneric.hpp +++ b/plugins/MediaGeneric.hpp @@ -34,7 +34,7 @@ namespace QuickMedia { class MediaGenericSearchPage : public Page { public: - MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass); + MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass, std::vector extra_commands = {}); const char* get_title() const override { return "Search"; } bool search_is_filter() override { return false; } SearchResult search(const std::string &str, BodyItems &result_items) override; @@ -68,6 +68,7 @@ namespace QuickMedia { std::vector related_media_thumbnail_queries; MediaRelatedCustomHandler related_custom_handler = nullptr; bool cloudflare_bypass; + std::vector extra_commands; }; class MediaGenericRelatedPage : public RelatedVideosPage { diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp index 9cf88c7..4f5fce8 100644 --- a/src/NetUtils.cpp +++ b/src/NetUtils.cpp @@ -44,12 +44,23 @@ namespace QuickMedia { index += 2; size_t end_index = str.find(';', index); if(end_index != std::string::npos && end_index - index <= 3) { - const size_t num_length = end_index - index; - int num; - if(to_num(str.c_str() + index, num_length, num)) { - const char num_c = (char)num; - str.replace(index - 2, 2 + num_length + 1, &num_c, 1); - index += (-2 + 1); + if(str[index] == 'x') { + ++index; + const size_t num_length = end_index - index; + int num; + if(to_num_hex(str.c_str() + index, num_length, num)) { + const char num_c = (char)num; + str.replace(index - 3, 3 + num_length + 1, &num_c, 1); + index += (-3 + 1); + } + } else { + const size_t num_length = end_index - index; + int num; + if(to_num(str.c_str() + index, num_length, num)) { + const char num_c = (char)num; + str.replace(index - 2, 2 + num_length + 1, &num_c, 1); + index += (-2 + 1); + } } } } diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index e63a5f2..b431532 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -879,8 +879,8 @@ namespace QuickMedia { static void add_xvideos_handlers(MediaGenericSearchPage *media_generic_search_page) { media_generic_search_page->search_handler("https://www.xvideos.com/?k=%s&p=%p", 0) - .text_handler({{"//div[id='content']//div[class='thumb-under']//a", "title", "href", "/video"}}) - .thumbnail_handler({{"//div[id='content']//div[class='thumb']//img", "data-src", "/videos/"}}) + .text_handler({{"//div[id='main']//div[class='thumb-under']//a", "title", "href", "/video"}}) + .thumbnail_handler({{"//div[id='main']//div[class='thumb']//img", "data-src", "/videos"}}) .related_media_custom_handler([](const std::string &html_source) { std::vector related_items; size_t related_start = html_source.find("video_related=["); @@ -1245,7 +1245,11 @@ namespace QuickMedia { use_youtube_dl = true; } else if(strcmp(plugin_name, "xvideos") == 0) { check_youtube_dl_installed(plugin_name); - auto search_page = std::make_unique(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f), false); + + std::vector extra_commands = { + { "-H", "Cookie: last_views=%5B%2236247565-" + std::to_string(time(nullptr)) + "%22%5D" } + }; + auto search_page = std::make_unique(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f), false, std::move(extra_commands)); add_xvideos_handlers(search_page.get()); tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)}); use_youtube_dl = true; diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp index 6a8d3f2..5706499 100644 --- a/src/StringUtils.cpp +++ b/src/StringUtils.cpp @@ -171,6 +171,31 @@ namespace QuickMedia { return true; } + bool to_num_hex(const char *str, size_t size, int &num) { + size_t i = 0; + const bool is_negative = size > 0 && str[0] == '-'; + if(is_negative) + i = 1; + + num = 0; + for(; i < size; ++i) { + const signed char c = str[i]; + if(c - '0' <= 9) + num = (num << 4) | (c - '0'); + else if(c - 'a' <= 'f' - 'a') + num = (num << 4) | (10 + (c - 'a')); + else if(c - 'A' <= 'F' - 'A') + num = (num << 4) | (10 + (c - 'A')); + else + return false; + } + + if(is_negative) + num = -num; + + return true; + } + // Returns relative time as a string (approximation) std::string seconds_to_relative_time_str(time_t seconds) { seconds = std::max(0L, seconds); diff --git a/src/plugins/MediaGeneric.cpp b/src/plugins/MediaGeneric.cpp index bcb8dc3..713c7f7 100644 --- a/src/plugins/MediaGeneric.cpp +++ b/src/plugins/MediaGeneric.cpp @@ -37,8 +37,8 @@ namespace QuickMedia { } } - static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, sf::Vector2i thumbnail_max_size, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass) { - std::vector args; + static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, sf::Vector2i thumbnail_max_size, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass, const std::vector &extra_commands) { + std::vector args = extra_commands; if(!website_url.empty()) args.push_back({ "-H", "referer: " + website_url }); @@ -118,8 +118,8 @@ namespace QuickMedia { } } - MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass) : - Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass) + MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass, std::vector extra_commands) : + Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass), extra_commands(std::move(extra_commands)) { if(!this->website_url.empty()) { if(this->website_url.back() != '/') @@ -135,7 +135,7 @@ namespace QuickMedia { std::string url = search_query.search_template; string_replace_all(url, "%s", url_param_encode(str)); string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); - return fetch_page_results(url, website_url, text_queries, thumbnail_queries, thumbnail_max_size, nullptr, result_items, cloudflare_bypass); + return fetch_page_results(url, website_url, text_queries, thumbnail_queries, thumbnail_max_size, nullptr, result_items, cloudflare_bypass, extra_commands); } PluginResult MediaGenericSearchPage::submit(const std::string&, const std::string &url, std::vector &result_tabs) { @@ -144,7 +144,7 @@ namespace QuickMedia { } PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) { - return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, thumbnail_max_size, &related_custom_handler, result_items, cloudflare_bypass); + return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, thumbnail_max_size, &related_custom_handler, result_items, cloudflare_bypass, extra_commands); } MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) { diff --git a/tests/main.cpp b/tests/main.cpp index ca09d4c..32acde6 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -62,5 +62,13 @@ int main() { urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str)); assert_equals(urls.size(), 1); assert_equals(urls[0], "https://sneedville.com"); + + std::string html_unescaped_str = "hello ' world"; + QuickMedia::html_unescape_sequences(html_unescaped_str); + assert_equals(html_unescaped_str, "hello ' world"); + + html_unescaped_str = "hello ' world"; + QuickMedia::html_unescape_sequences(html_unescaped_str); + assert_equals(html_unescaped_str, "hello ' world"); return 0; } -- cgit v1.2.3