From 1ba58ba74a767a91b28f44eb75db41455adcaa70 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 27 Apr 2021 14:00:35 +0200 Subject: Add authors page for mangakatana and mangatown, attempt to fix image loading getting stuck, misc fixed related to async in plugins --- plugins/Manga.hpp | 2 + plugins/MangaCombined.hpp | 2 + plugins/MangaGeneric.hpp | 6 +-- plugins/Mangadex.hpp | 1 + plugins/Manganelo.hpp | 1 + plugins/Soundcloud.hpp | 2 + src/QuickMedia.cpp | 18 +++++++-- src/plugins/MangaCombined.cpp | 5 +-- src/plugins/MangaGeneric.cpp | 92 ++++++++++++++++++++++++++----------------- src/plugins/Manganelo.cpp | 6 +-- src/plugins/Soundcloud.cpp | 5 +-- 11 files changed, 88 insertions(+), 52 deletions(-) diff --git a/plugins/Manga.hpp b/plugins/Manga.hpp index 5dfa800..388ce66 100644 --- a/plugins/Manga.hpp +++ b/plugins/Manga.hpp @@ -37,6 +37,8 @@ namespace QuickMedia { // TODO: Remove and use plugin name instead virtual const char* get_service_name() const = 0; + virtual const char* get_website_url() const = 0; + const std::string manga_name; protected: std::string chapter_name; diff --git a/plugins/MangaCombined.hpp b/plugins/MangaCombined.hpp index b9627fe..56b03ba 100644 --- a/plugins/MangaCombined.hpp +++ b/plugins/MangaCombined.hpp @@ -3,6 +3,7 @@ #include "Manga.hpp" #include #include +#include "../include/AsyncTask.hpp" namespace QuickMedia { struct MangaPlugin { @@ -23,5 +24,6 @@ namespace QuickMedia { sf::Vector2i get_thumbnail_max_size() override { return sf::Vector2i(101, 141); }; private: std::vector search_pages; + std::vector>> search_threads; }; } \ No newline at end of file diff --git a/plugins/MangaGeneric.hpp b/plugins/MangaGeneric.hpp index a03756d..2dca919 100644 --- a/plugins/MangaGeneric.hpp +++ b/plugins/MangaGeneric.hpp @@ -113,9 +113,8 @@ namespace QuickMedia { MangaGenericSearchPage& description_handler(std::vector queries); // This is optional. MangaGenericSearchPage& thumbnail_handler(std::vector queries); - // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. // This is optional. - MangaGenericSearchPage& authors_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains); + MangaGenericSearchPage& authors_handler(std::vector queries); // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. // This is required. @@ -151,7 +150,7 @@ namespace QuickMedia { std::vector text_queries; std::vector description_queries; std::vector thumbnail_queries; - AuthorsQuery authors_query; + std::vector authors_queries; ListChaptersQuery list_chapters_query; ListPageQuery list_page_query; MangaIdExtractor manga_id_extractor; @@ -192,6 +191,7 @@ namespace QuickMedia { ImageResult get_number_of_images(int &num_images) override; ImageResult for_each_page_in_chapter(PageCallback callback) override; const char* get_service_name() const override { return service_name; } + const char* get_website_url() const override { return website_url.c_str(); } private: ImageResult get_page_image_urls(); private: diff --git a/plugins/Mangadex.hpp b/plugins/Mangadex.hpp index 7fd6ca0..1befa1a 100644 --- a/plugins/Mangadex.hpp +++ b/plugins/Mangadex.hpp @@ -34,6 +34,7 @@ namespace QuickMedia { ImageResult get_number_of_images(int &num_images) override; ImageResult for_each_page_in_chapter(PageCallback callback) override; const char* get_service_name() const override { return "mangadex"; } + const char* get_website_url() const override { return "https://mangadex.org/"; } private: // Cached ImageResult get_image_urls_for_chapter(const std::string &url); diff --git a/plugins/Manganelo.hpp b/plugins/Manganelo.hpp index ca44e05..8e6c2cd 100644 --- a/plugins/Manganelo.hpp +++ b/plugins/Manganelo.hpp @@ -39,6 +39,7 @@ namespace QuickMedia { ImageResult get_number_of_images(int &num_images) override; ImageResult for_each_page_in_chapter(PageCallback callback) override; const char* get_service_name() const override { return "manganelo"; } + const char* get_website_url() const override { return "https://manganelo.com/"; } private: ImageResult get_image_urls_for_chapter(const std::string &url); }; diff --git a/plugins/Soundcloud.hpp b/plugins/Soundcloud.hpp index bb23efb..24dc051 100644 --- a/plugins/Soundcloud.hpp +++ b/plugins/Soundcloud.hpp @@ -1,6 +1,7 @@ #pragma once #include "Page.hpp" +#include "../include/AsyncTask.hpp" namespace QuickMedia { class SoundcloudPage : public Page { @@ -24,6 +25,7 @@ namespace QuickMedia { private: SoundcloudPage submit_page; std::string query_urn; + std::vector> async_download_threads; }; class SoundcloudUserPage : public SoundcloudPage { diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index e2ee0ec..804d3d3 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -726,6 +726,10 @@ namespace QuickMedia { manga_generic_search_page->search_handler("https://mangatown.com/search?name=%s&page=%p", 1) .text_handler({{"//p[class='title']/a", "title", "href", "/manga/"}}) .thumbnail_handler({{"//a[class='manga_cover']/img", "src", nullptr}}) + .authors_handler({ + {"//div[class='detail_content']//a", "text", "href", "/author/"}, + {"//div[class='detail_content']//a", "text", "href", "/artist/"} + }) .list_chapters_handler("//ul[class='chapter_list']//a", "text", "href", "/manga/") .list_chapters_uploaded_time_handler("//ul[class='chapter_list']//span[class='time']", "text", nullptr) .list_page_images_pagination_handler( @@ -745,7 +749,7 @@ namespace QuickMedia { {"//div[id='book_list']//div[class='media']//img", "src", nullptr}, {"//div[id='single_book']//div[class='cover']//img", "src", nullptr} }) - .authors_handler("//div[id='single_book']//a[class='author']", "text", "href", "/author/") + .authors_handler({{"//div[id='single_book']//a[class='author']", "text", "href", "/author/"}}) .list_chapters_handler("//div[class='chapters']//div[class='chapter']//a", "text", "href", "/manga/") .list_chapters_uploaded_time_handler("//div[class='chapters']//div[class='update_time']", "text", nullptr) .list_page_images_custom_handler([](const std::string &html_source) { @@ -825,7 +829,7 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "manganelos") == 0) { auto search_body = create_body(); - auto search_page = std::make_unique(this, plugin_name, nullptr); + auto search_page = std::make_unique(this, plugin_name, "http://manganelos.com/"); add_manganelos_handlers(search_page.get()); tabs.push_back(Tab{std::move(search_body), std::move(search_page), create_search_bar("Search...", 400)}); @@ -863,7 +867,7 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "manga") == 0) { auto manganelo = std::make_unique(this); - auto manganelos = std::make_unique(this, "manganelos", nullptr); + auto manganelos = std::make_unique(this, "manganelos", "http://manganelos.com/"); add_manganelos_handlers(manganelos.get()); auto mangatown = std::make_unique(this, "mangatown", "https://www.mangatown.com"); add_mangatown_handlers(mangatown.get()); @@ -2494,6 +2498,7 @@ namespace QuickMedia { std::vector extra_args; const bool is_manganelo = (strcmp(images_page->get_service_name(), "manganelo") == 0); + const char *website_url = images_page->get_website_url(); if(is_manganelo) { extra_args = { CommandArg { "-H", "accept: image/jpeg,image/png,image/*,*/*;q=0.8" }, @@ -2504,6 +2509,13 @@ namespace QuickMedia { CommandArg { "-m", "30" }, CommandArg { "--connect-timeout", "30" } }; + } else if(website_url && website_url[0] != '\0') { + std::string website_url_str = website_url; + if(website_url_str.back() != '/') + website_url_str.push_back('/'); + extra_args = { + CommandArg { "-H", "referer: " + std::move(website_url_str) }, + }; } Path image_filepath_tmp(image_filepath.data + ".tmpz"); diff --git a/src/plugins/MangaCombined.cpp b/src/plugins/MangaCombined.cpp index e6b8028..ea6188c 100644 --- a/src/plugins/MangaCombined.cpp +++ b/src/plugins/MangaCombined.cpp @@ -1,5 +1,4 @@ #include "../../plugins/MangaCombined.hpp" -#include "../../include/AsyncTask.hpp" namespace QuickMedia { static const int SEARCH_TIMEOUT_MILLISECONDS = 6000; @@ -75,7 +74,7 @@ namespace QuickMedia { } SearchResult MangaCombinedSearchPage::search(const std::string &str, BodyItems &result_items) { - std::vector>> search_threads; + search_threads.clear(); for(auto &search_page : search_pages) { search_threads.push_back(std::make_pair(&search_page, AsyncTask([&str, &search_page]() { BodyItems search_page_body_items; @@ -89,7 +88,7 @@ namespace QuickMedia { } PluginResult MangaCombinedSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - std::vector>> search_threads; + search_threads.clear(); for(auto &search_page : search_pages) { search_threads.push_back(std::make_pair(&search_page, AsyncTask([&str, page, &search_page]() { BodyItems search_page_body_items; diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp index 58bedf4..bf8a4c4 100644 --- a/src/plugins/MangaGeneric.cpp +++ b/src/plugins/MangaGeneric.cpp @@ -145,9 +145,13 @@ namespace QuickMedia { } PluginResult MangaGenericSearchPage::get_page(const std::string &url, BodyItems &result_items) { + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + std::string target_url; std::string website_data; - if(download_to_string(url, website_data, {}, true, fail_on_http_error) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, fail_on_http_error) != DownloadResult::OK) return PluginResult::NET_ERR; if(website_data.empty()) @@ -265,7 +269,11 @@ namespace QuickMedia { return PluginResult::ERR; } - std::vector creators; + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + + std::map creators; // key = name, value = url BodyItems chapters_items; HtmlSearchUserdata search_userdata; search_userdata.body_items = &chapters_items; @@ -274,7 +282,7 @@ namespace QuickMedia { search_userdata.field2_contains = list_chapters_query.url_contains; std::string website_data; - if(download_to_string(url, website_data, {}, true, fail_on_http_error) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, fail_on_http_error) != DownloadResult::OK) return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; @@ -298,28 +306,26 @@ namespace QuickMedia { result = html_body_item_merge(&html_search, list_chapters_query.uploaded_time_html_query, &merge_userdata); } - if(authors_query.html_query && authors_query.title_field && authors_query.url_field) { - struct HtmlAuthorsUserdata { - std::vector *creators; - AuthorsQuery *authors_query; - }; + struct HtmlAuthorsUserdata { + std::map *creators; + const AuthorsQuery *authors_query; + }; - HtmlAuthorsUserdata authors_userdata; - authors_userdata.creators = &creators; - authors_userdata.authors_query = &authors_query; + for(const AuthorsQuery &authors_query : authors_queries) { + if(authors_query.html_query && authors_query.title_field && authors_query.url_field) { + HtmlAuthorsUserdata authors_userdata; + authors_userdata.creators = &creators; + authors_userdata.authors_query = &authors_query; - quickmedia_html_find_nodes_xpath(&html_search, authors_query.html_query, - [](QuickMediaHtmlNode *node, void *userdata) { - HtmlAuthorsUserdata *authors_userdata = (HtmlAuthorsUserdata*)userdata; - const char *title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); - const char *url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); - if(title_value && url_value && (!authors_userdata->authors_query->url_contains || strstr(url_value, authors_userdata->authors_query->url_contains))) { - Creator creator; - creator.name = strip(title_value); - creator.url = strip(url_value); - authors_userdata->creators->push_back(std::move(creator)); - } - }, &authors_userdata); + quickmedia_html_find_nodes_xpath(&html_search, authors_query.html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlAuthorsUserdata *authors_userdata = (HtmlAuthorsUserdata*)userdata; + const char *title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); + const char *url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); + if(title_value && url_value && (!authors_userdata->authors_query->url_contains || strstr(url_value, authors_userdata->authors_query->url_contains))) + (*authors_userdata->creators)[strip(title_value)] = strip(url_value); + }, &authors_userdata); + } } for(auto &body_item : chapters_items) { @@ -334,11 +340,11 @@ namespace QuickMedia { body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); } - for(auto &creator : creators) { - if(string_starts_with(creator.url, "//")) - creator.url = "https://" + creator.url.substr(2); - else if(string_starts_with(creator.url, "/")) - creator.url = website_url + creator.url.substr(1); + for(auto &it : creators) { + if(string_starts_with(it.second, "//")) + it.second = "https://" + it.second.substr(2); + else if(string_starts_with(it.second, "/")) + it.second = website_url + it.second.substr(1); } cleanup: @@ -350,7 +356,10 @@ namespace QuickMedia { body->items = std::move(chapters_items); result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url, manga_id_extractor, service_name, website_url, &list_page_query, fail_on_http_error), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); - for(Creator &creator : creators) { + for(auto &it : creators) { + Creator creator; + creator.name = it.first; + creator.url = it.second; result_tabs.push_back(Tab{create_body(), std::make_unique(program, this, std::move(creator)), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); } @@ -443,8 +452,12 @@ namespace QuickMedia { next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + std::string website_data; - if(download_to_string(url, website_data, {}, true, fail_on_http_error) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, fail_on_http_error) != DownloadResult::OK) return ImageResult::NET_ERR; QuickMediaHtmlSearch html_search; @@ -547,12 +560,16 @@ namespace QuickMedia { next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + std::string image_src; std::string website_data; DownloadErrorHandler error_callback = [](std::string&){ return true; }; if(fail_on_http_error) error_callback = nullptr; - if(download_to_string_cache(full_url, website_data, {}, true, error_callback) != DownloadResult::OK) + if(download_to_string_cache(full_url, website_data, args, true, error_callback) != DownloadResult::OK) return ImageResult::ERR; QuickMediaHtmlSearch html_search; @@ -594,8 +611,12 @@ namespace QuickMedia { if(!chapter_image_urls.empty()) return ImageResult::OK; + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + std::string website_data; - if(download_to_string(url, website_data, {}, true, fail_on_http_error) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, fail_on_http_error) != DownloadResult::OK) return ImageResult::NET_ERR; if(list_page_query->type == ListPageQueryType::IMAGES) { @@ -678,11 +699,8 @@ namespace QuickMedia { return *this; } - MangaGenericSearchPage& MangaGenericSearchPage::authors_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { - authors_query.html_query = html_query; - authors_query.title_field = title_field; - authors_query.url_field = url_field; - authors_query.url_contains = url_contains; + MangaGenericSearchPage& MangaGenericSearchPage::authors_handler(std::vector queries) { + authors_queries = std::move(queries); return *this; } diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp index f8f604d..e6bdfc0 100644 --- a/src/plugins/Manganelo.cpp +++ b/src/plugins/Manganelo.cpp @@ -30,7 +30,7 @@ namespace QuickMedia { std::vector creators; std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + if(download_to_string(url, website_data, {CommandArg { "-H", "referer: https://manganelo.com/" }}, true) != DownloadResult::OK) return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; @@ -186,7 +186,7 @@ namespace QuickMedia { PluginResult ManganeloCreatorPage::lazy_fetch(BodyItems &result_items) { std::string website_data; - if(download_to_string(creator.url, website_data, {}, true) != DownloadResult::OK) + if(download_to_string(creator.url, website_data, {CommandArg { "-H", "referer: https://manganelo.com/" }}, true) != DownloadResult::OK) return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; @@ -255,7 +255,7 @@ namespace QuickMedia { return ImageResult::OK; std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + if(download_to_string(url, website_data, {CommandArg { "-H", "referer: https://manganelo.com/" }}, true) != DownloadResult::OK) return ImageResult::NET_ERR; QuickMediaHtmlSearch html_search; diff --git a/src/plugins/Soundcloud.cpp b/src/plugins/Soundcloud.cpp index 6740504..90fe144 100644 --- a/src/plugins/Soundcloud.cpp +++ b/src/plugins/Soundcloud.cpp @@ -3,7 +3,6 @@ #include "../../include/StringUtils.hpp" #include "../../include/Scale.hpp" #include -#include namespace QuickMedia { static std::string client_id; @@ -300,14 +299,14 @@ namespace QuickMedia { if(result != 0) return PluginResult::ERR; - std::vector> async_download_threads; + async_download_threads.clear(); for(std::string &script_source : script_sources) { if(string_starts_with(script_source, "//")) script_source = "https://" + script_source.substr(2); else if(string_starts_with(script_source, "/")) script_source = "https://soundcloud.com/" + script_source.substr(1); - async_download_threads.push_back(std::async(std::launch::async, [script_source]() -> std::string { + async_download_threads.push_back(AsyncTask([script_source]() -> std::string { std::string website_data; DownloadResult download_result = download_to_string(script_source, website_data, {}, true); if(download_result != DownloadResult::OK) return ""; -- cgit v1.2.3