From ba4e62d55156f9b94b569b56b6382bbcf94b7d86 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 16 Apr 2021 09:37:53 +0200 Subject: Convert mangatown and manganelos into a generic manga plugin Revert for_each_page.. processing of manga instead of getting all pages. Mangatown requires you to navigate page by page, cant predict what a specific pages image url will be. --- src/QuickMedia.cpp | 71 ++++-- src/SearchBar.cpp | 1 + src/plugins/MangaGeneric.cpp | 567 +++++++++++++++++++++++++++++++++++++++++++ src/plugins/Mangadex.cpp | 27 ++- src/plugins/Manganelo.cpp | 33 ++- src/plugins/Manganelos.cpp | 154 ------------ src/plugins/Mangatown.cpp | 217 ----------------- 7 files changed, 674 insertions(+), 396 deletions(-) create mode 100644 src/plugins/MangaGeneric.cpp delete mode 100644 src/plugins/Manganelos.cpp delete mode 100644 src/plugins/Mangatown.cpp (limited to 'src') diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index 5bdc67e..26760ec 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -1,8 +1,7 @@ #include "../include/QuickMedia.hpp" #include "../plugins/Manganelo.hpp" -#include "../plugins/Manganelos.hpp" -#include "../plugins/Mangatown.hpp" #include "../plugins/Mangadex.hpp" +#include "../plugins/MangaGeneric.hpp" #include "../plugins/Youtube.hpp" #include "../plugins/Pornhub.hpp" #include "../plugins/Spankbang.hpp" @@ -675,9 +674,45 @@ namespace QuickMedia { show_room_side_panel = false; else show_room_side_panel = true; + main_thread_id = std::this_thread::get_id(); } + static void add_manganelos_handlers(MangaGenericSearchPage *manga_generic_search_page) { + manga_generic_search_page->search_handler("http://manganelos.com/search?q=", "&page=", 1) + .text_handler("//div[class='media-left cover-manga']//a", "title", "href", "/manga/") + .thumbnail_handler("//div[class='media-left cover-manga']//img[class='media-object']", "src", "/mangaimage/") + .list_chapters_handler("//section[id='examples']//div[class='chapter-list']//a", "text", "href", nullptr) + .list_page_images_handler("//p[id='arraydata']", "text", nullptr, [](std::vector &urls) { + if(urls.size() != 1) + return; + + std::string urls_combined = urls.front(); + urls.clear(); + + string_split(urls_combined, ',', [&urls](const char *str, size_t size) { + std::string url(str, size); + url = strip(url); + urls.push_back(std::move(url)); + return true; + }); + }) + .manga_id_handler("/manga/", "?"); + } + + static void add_mangatown_handlers(MangaGenericSearchPage *manga_generic_search_page) { + manga_generic_search_page->search_handler("https://www.mangatown.com/search?name=", "&page=", 1) + .text_handler("//p[class='title']/a", "title", "href", "/manga/") + .thumbnail_handler("//a[class='manga_cover']/img", "src", nullptr) + .list_chapters_handler("//ul[class='chapter_list']//a", "text", "href", "/manga/") + .list_chapters_uploaded_time_handler("//ul[class='chapter_list']//span[class='time']", "text", nullptr) + .list_page_images_pagination_handler( + "//div[class='page_select']//option", "value", "/manga/", [](int num_pages){ return std::max(0, (num_pages /= 2) - 1); }, + "//img[id='image']", "src", nullptr, + "//a[class='next_page']", "href", nullptr) + .manga_id_handler("/manga/", "/"); + } + void Program::load_plugin_by_name(std::vector &tabs, const char *start_dir) { if(!plugin_name || plugin_name[0] == '\0') return; @@ -727,7 +762,9 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "manganelos") == 0) { auto search_body = create_body(); - tabs.push_back(Tab{std::move(search_body), std::make_unique(this), create_search_bar("Search...", 400)}); + auto search_page = std::make_unique(this, plugin_name, nullptr); + add_manganelos_handlers(search_page.get()); + tabs.push_back(Tab{std::move(search_body), std::move(search_page), create_search_bar("Search...", 400)}); auto history_body = create_body(); auto search_bar = create_search_bar("Search...", SEARCH_DELAY_FILTER); @@ -735,7 +772,9 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "mangatown") == 0) { auto search_body = create_body(); - tabs.push_back(Tab{std::move(search_body), std::make_unique(this), create_search_bar("Search...", 400)}); + auto search_page = std::make_unique(this, plugin_name, "https://www.mangatown.com"); + add_mangatown_handlers(search_page.get()); + tabs.push_back(Tab{std::move(search_body), std::move(search_page), create_search_bar("Search...", 400)}); auto history_body = create_body(); auto search_bar = create_search_bar("Search...", SEARCH_DELAY_FILTER); @@ -2321,24 +2360,24 @@ namespace QuickMedia { Path content_cache_dir_ = content_cache_dir; image_download_future = AsyncTask>([images_page, content_cache_dir_, this](std::promise num_manga_pages_promise) { - std::vector page_image_urls; - if(images_page->get_page_image_urls(page_image_urls) != ImageResult::OK) { + int num_pages = 0; + if(images_page->get_number_of_images(num_pages) != ImageResult::OK) { num_manga_pages_promise.set_value(0); if(!image_download_cancel) show_notification("QuickMedia", "Failed to fetch page images", Urgency::CRITICAL); return; } else { - num_manga_pages_promise.set_value(page_image_urls.size()); - image_upscale_status.resize(page_image_urls.size(), 0); + num_manga_pages_promise.set_value(num_pages); + image_upscale_status.resize(num_pages, 0); } - if(page_image_urls.empty()) + if(num_pages == 0) return; // TODO: Download images in parallel int page = 1; - for(const std::string &url : page_image_urls) { + images_page->for_each_page_in_chapter([this, images_page, &page, content_cache_dir_](const std::string &url) { if(image_download_cancel) - return; + return false; int image_index = page - 1; @@ -2355,7 +2394,7 @@ namespace QuickMedia { } if(get_file_type(image_filepath) != FileType::FILE_NOT_FOUND && upscaled_ok) - continue; + return true; std::vector extra_args; const bool is_manganelo = (strcmp(images_page->get_service_name(), "manganelo") == 0); @@ -2376,7 +2415,7 @@ namespace QuickMedia { size_t file_size = 0; if(download_to_file(url, image_filepath_tmp.data, extra_args, true) != DownloadResult::OK || (is_manganelo && file_get_size(image_filepath_tmp, &file_size) == 0 && file_size < 255)) { if(!image_download_cancel) show_notification("QuickMedia", "Failed to download image: " + url, Urgency::CRITICAL); - continue; + return true; } bool rename_immediately = true; @@ -2412,10 +2451,12 @@ namespace QuickMedia { if(rename(image_filepath_tmp.data.c_str(), image_filepath.data.c_str()) != 0) { perror(image_filepath_tmp.data.c_str()); show_notification("QuickMedia", "Failed to save image to file: " + image_filepath.data, Urgency::CRITICAL); - continue; + return true; } } - } + + return true; + }); }, std::move(num_manga_pages_promise)); sf::Event event; diff --git a/src/SearchBar.cpp b/src/SearchBar.cpp index b20c3b0..63515bd 100644 --- a/src/SearchBar.cpp +++ b/src/SearchBar.cpp @@ -103,6 +103,7 @@ namespace QuickMedia { clear(); updated_search = true; updated_autocomplete = true; + time_since_search_update.restart(); } if(event.type == sf::Event::TextEntered && event.text.unicode != 8 && event.text.unicode != 127) // 8 = backspace, 127 = del diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp new file mode 100644 index 0000000..a359698 --- /dev/null +++ b/src/plugins/MangaGeneric.cpp @@ -0,0 +1,567 @@ +#include "../../plugins/MangaGeneric.hpp" +#include "../../include/StringUtils.hpp" +#include +#include + +namespace QuickMedia { + struct HtmlSearchUserdata { + BodyItems *body_items; + const char *field1 = nullptr; + const char *field2 = nullptr; + const char *field2_contains = nullptr; + }; + + enum class MergeType { + THUMBNAIL, + UPLOAD_TIME + }; + + struct HtmlMergeUserdata { + MergeType type; + BodyItemContext body_item_image_context; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlListPageImagesUserdata { + std::vector *urls; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlPageCountUserdata { + int num_pages = 0; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlPageImageUserdata { + std::string *url = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + if(strcmp(field_name, "text") == 0) + return quickmedia_html_node_get_text(node); + else + return quickmedia_html_node_get_attribute_value(node, field_name); + } + + static bool starts_with(const std::string &str, const char *sub) { + size_t sub_len = strlen(sub); + return str.size() >= sub_len && memcmp(str.c_str(), sub, sub_len) == 0; + } + + static int html_append_search(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlSearchUserdata *search_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1); + const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); + if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { + auto item = BodyItem::create(strip(field1_value)); + item->url = strip(field2_value); + search_userdata->body_items->push_back(std::move(item)); + } + }, search_userdata); + } + + static int html_body_item_merge(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlMergeUserdata *merge_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlMergeUserdata *merge_userdata = (HtmlMergeUserdata*)userdata; + BodyItemContext &body_item_image_context = merge_userdata->body_item_image_context; + const char *field_value = html_attr_or_inner_text(node, merge_userdata->field_name); + if(body_item_image_context.index < body_item_image_context.body_items->size() + && field_value && (!merge_userdata->field_contains || strstr(field_value, merge_userdata->field_contains))) + { + if(merge_userdata->type == MergeType::THUMBNAIL) { + (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = strip(field_value); + } else if(merge_userdata->type == MergeType::UPLOAD_TIME) { + std::string uploaded_date = strip(field_value); + (*body_item_image_context.body_items)[body_item_image_context.index]->set_description("Uploaded: " + uploaded_date); + } + body_item_image_context.index++; + } + }, merge_userdata); + } + + static int html_get_page_url(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlPageImageUserdata *page_image_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlPageImageUserdata *page_image_userdata = (HtmlPageImageUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); + if(page_image_userdata->url->empty() && field1_value && (!page_image_userdata->field_contains || strstr(field1_value, page_image_userdata->field_contains))) { + *page_image_userdata->url = strip(field1_value); + } + }, page_image_userdata); + } + + MangaGenericSearchPage::MangaGenericSearchPage(Program *program, const char *service_name, const char *website_url) : Page(program), service_name(service_name), website_url(website_url ? website_url : "") + { + if(!this->website_url.empty()) { + if(this->website_url.back() != '/') + this->website_url.push_back('/'); + } + } + + SearchResult MangaGenericSearchPage::search(const std::string &str, BodyItems &result_items) { + return plugin_result_to_search_result(get_page(str, 0, result_items)); + } + + PluginResult MangaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { + if(!search_query.search_prefix || !search_query.page_prefix || !text_query.html_query || !text_query.title_field || !text_query.url_field) { + assert(false); + return PluginResult::ERR; + } + + HtmlSearchUserdata search_userdata; + search_userdata.body_items = &result_items; + search_userdata.field1 = text_query.title_field; + search_userdata.field2 = text_query.url_field; + search_userdata.field2_contains = text_query.url_contains; + + std::string url = search_query.search_prefix; + url += url_param_encode(str); + url += search_query.page_prefix + std::to_string(search_query.page_start + page); + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return PluginResult::NET_ERR; + + if(website_data.empty()) + return PluginResult::OK; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = html_append_search(&html_search, text_query.html_query, &search_userdata); + if(result != 0) + goto cleanup; + + assert(!thumbnail_query.html_query || thumbnail_query.field_name); + if(thumbnail_query.html_query && thumbnail_query.field_name) { + HtmlMergeUserdata merge_userdata; + merge_userdata.type = MergeType::THUMBNAIL; + merge_userdata.body_item_image_context.body_items = &result_items; + merge_userdata.body_item_image_context.index = 0; + merge_userdata.field_name = thumbnail_query.field_name; + merge_userdata.field_contains = thumbnail_query.field_contains; + result = html_body_item_merge(&html_search, thumbnail_query.html_query, &merge_userdata); + } + + for(auto &body_item : result_items) { + if(starts_with(body_item->url, "//")) + body_item->url = "https://" + body_item->url.substr(2); + else if(starts_with(body_item->url, "/")) + body_item->url = website_url + body_item->url.substr(1); + + if(starts_with(body_item->thumbnail_url, "//")) + body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); + else if(starts_with(body_item->thumbnail_url, "/")) + body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result == 0) { + return PluginResult::OK; + } else { + result_items.clear(); + return PluginResult::ERR; + } + } + + PluginResult MangaGenericSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { + if(!list_chapters_query.html_query || !list_chapters_query.title_field || !list_chapters_query.url_field) { + assert(false); + return PluginResult::ERR; + } + + BodyItems chapters_items; + HtmlSearchUserdata search_userdata; + search_userdata.body_items = &chapters_items; + search_userdata.field1 = list_chapters_query.title_field; + search_userdata.field2 = list_chapters_query.url_field; + search_userdata.field2_contains = list_chapters_query.url_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return PluginResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = html_append_search(&html_search, list_chapters_query.html_query, &search_userdata); + if(result != 0) + goto cleanup; + + assert(!list_chapters_query.uploaded_time_html_query || list_chapters_query.uploaded_time_field_name); + if(list_chapters_query.uploaded_time_html_query && list_chapters_query.uploaded_time_field_name) { + HtmlMergeUserdata merge_userdata; + merge_userdata.type = MergeType::UPLOAD_TIME; + merge_userdata.body_item_image_context.body_items = &chapters_items; + merge_userdata.body_item_image_context.index = 0; + merge_userdata.field_name = list_chapters_query.uploaded_time_field_name; + merge_userdata.field_contains = list_chapters_query.uploaded_time_field_contains; + result = html_body_item_merge(&html_search, list_chapters_query.uploaded_time_html_query, &merge_userdata); + } + + for(auto &body_item : chapters_items) { + if(starts_with(body_item->url, "//")) + body_item->url = "https://" + body_item->url.substr(2); + else if(starts_with(body_item->url, "/")) + body_item->url = website_url + body_item->url.substr(1); + + if(starts_with(body_item->thumbnail_url, "//")) + body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); + else if(starts_with(body_item->thumbnail_url, "/")) + body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0) + return PluginResult::ERR; + + auto body = create_body(); + body->items = std::move(chapters_items); + result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url, manga_id_extractor, service_name, website_url, &list_page_query), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); + return PluginResult::OK; + } + + PluginResult MangaGenericChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { + result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url, service_name, website_url, list_page_query), nullptr}); + return PluginResult::OK; + } + + bool MangaGenericChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { + size_t start_index = url.find(manga_id_extractor.prefix); + if(start_index == std::string::npos) + return false; + + if(!manga_id_extractor.end) { + manga_id = url.substr(start_index); + return true; + } + + start_index += strlen(manga_id_extractor.prefix); + size_t end_index = url.find(manga_id_extractor.end, start_index); + if(end_index == std::string::npos) { + manga_id = url.substr(start_index); + return true; + } + + manga_id = url.substr(start_index, end_index - start_index); + return true; + } + + ImageResult MangaGenericImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + chapter_num_pages = -1; + switch(list_page_query->type) { + case ListPageQueryType::IMAGES: { + ImageResult result = get_page_image_urls(); + if(result != ImageResult::OK) return result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + case ListPageQueryType::PAGINATION: { + const ListPagePaginationQuery *list_page_pagination_query = &list_page_query->pagination_query; + if(!list_page_pagination_query->pages_html_query || !list_page_pagination_query->pages_field_name + || !list_page_pagination_query->image_html_query || !list_page_pagination_query->image_field_name + || !list_page_pagination_query->next_page_html_query || !list_page_pagination_query->next_page_field_name) + { + assert(false); + return ImageResult::ERR; + } + + if(chapter_num_pages != -1) { + num_images = chapter_num_pages; + return ImageResult::OK; + } + + current_image_url.clear(); + next_page_url.clear(); + + HtmlPageCountUserdata page_count_userdata; + page_count_userdata.num_pages = 0; + page_count_userdata.field_name = list_page_pagination_query->pages_field_name; + page_count_userdata.field_contains = list_page_pagination_query->pages_field_contains; + + HtmlPageImageUserdata page_image_userdata; + page_image_userdata.url = ¤t_image_url; + page_image_userdata.field_name = list_page_pagination_query->image_field_name; + page_image_userdata.field_contains = list_page_pagination_query->image_field_contains; + + HtmlPageImageUserdata next_page_userdata; + next_page_userdata.url = &next_page_url; + next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; + next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, list_page_pagination_query->pages_html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlPageCountUserdata *page_count_userdata = (HtmlPageCountUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); + if(field1_value && (!page_count_userdata->field_contains || strstr(field1_value, page_count_userdata->field_contains))) { + page_count_userdata->num_pages++; + } + }, &page_count_userdata); + + if(result == 0 && list_page_pagination_query->pages_post_handler) { + page_count_userdata.num_pages = list_page_pagination_query->pages_post_handler(page_count_userdata.num_pages); + } + + if(result != 0 || page_count_userdata.num_pages == 0) { + result = -1; + goto cleanup; + } + + result = html_get_page_url(&html_search, list_page_pagination_query->image_html_query, &page_image_userdata); + if(result != 0 || current_image_url.empty()) { + result = -1; + goto cleanup; + } + + result = html_get_page_url(&html_search, list_page_pagination_query->next_page_html_query, &next_page_userdata); + if(next_page_url.empty()) + result = -1; + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0) { + current_image_url.clear(); + next_page_url.clear(); + return ImageResult::ERR; + } + + if(starts_with(current_image_url, "//")) + current_image_url = "https://" + current_image_url.substr(2); + else if(starts_with(current_image_url, "/")) + current_image_url = website_url + current_image_url.substr(1); + + num_images = page_count_userdata.num_pages; + chapter_num_pages = num_images; + return ImageResult::OK; + } + } + return ImageResult::OK; + } + + ImageResult MangaGenericImagesPage::for_each_page_in_chapter(PageCallback callback) { + switch(list_page_query->type) { + case ListPageQueryType::IMAGES: { + ImageResult result = get_page_image_urls(); + if(result != ImageResult::OK) return result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + case ListPageQueryType::PAGINATION: { + const ListPagePaginationQuery *list_page_pagination_query = &list_page_query->pagination_query; + if(!list_page_pagination_query->image_html_query || !list_page_pagination_query->image_field_name + || !list_page_pagination_query->next_page_html_query || !list_page_pagination_query->next_page_field_name) + { + assert(false); + return ImageResult::ERR; + } + + int num_images = 0; + ImageResult result = get_number_of_images(num_images); + if(result != ImageResult::OK) return result; + + if(!callback(current_image_url)) + return ImageResult::OK; + + for(int i = 0; i < num_images; ++i) { + std::string full_url = url + next_page_url; + current_image_url.clear(); + next_page_url.clear(); + + HtmlPageImageUserdata page_image_userdata; + page_image_userdata.url = ¤t_image_url; + page_image_userdata.field_name = list_page_pagination_query->image_field_name; + page_image_userdata.field_contains = list_page_pagination_query->image_field_contains; + + HtmlPageImageUserdata next_page_userdata; + next_page_userdata.url = &next_page_url; + next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; + next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + + std::string image_src; + std::string website_data; + if(download_to_string_cache(full_url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + html_get_page_url(&html_search, list_page_pagination_query->image_html_query, &page_image_userdata); + html_get_page_url(&html_search, list_page_pagination_query->next_page_html_query, &next_page_userdata); + + cleanup: + quickmedia_html_search_deinit(&html_search); + + if(starts_with(current_image_url, "//")) + current_image_url = "https://" + current_image_url.substr(2); + else if(starts_with(current_image_url, "/")) + current_image_url = website_url + current_image_url.substr(1); + + if(!callback(current_image_url)) + break; + } + + return ImageResult::OK; + } + } + return ImageResult::OK; + } + + ImageResult MangaGenericImagesPage::get_page_image_urls() { + if(!prev_chapter_url.empty()) + return ImageResult::OK; + + assert(list_page_query->type == ListPageQueryType::IMAGES); + const ListPageImagesQuery *list_page_images_query = &list_page_query->images_query; + if(!list_page_images_query->html_query || !list_page_images_query->field_name) { + assert(false); + return ImageResult::ERR; + } + + HtmlListPageImagesUserdata list_page_images_userdata; + list_page_images_userdata.urls = &chapter_image_urls; + list_page_images_userdata.field_name = list_page_images_query->field_name; + list_page_images_userdata.field_contains = list_page_images_query->field_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, list_page_images_query->html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlListPageImagesUserdata *list_page_images_userdata = (HtmlListPageImagesUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); + if(field1_value && (!list_page_images_userdata->field_contains || strstr(field1_value, list_page_images_userdata->field_contains))) { + list_page_images_userdata->urls->push_back(strip(field1_value)); + } + }, &list_page_images_userdata); + + if(result == 0 && !chapter_image_urls.empty() && list_page_images_query->post_handler) + list_page_images_query->post_handler(chapter_image_urls); + + for(std::string &url : chapter_image_urls) { + if(starts_with(url, "//")) + url = "https://" + url.substr(2); + else if(starts_with(url, "/")) + url = website_url + url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0 || chapter_image_urls.empty()) { + chapter_image_urls.clear(); + return ImageResult::ERR; + } + + prev_chapter_url = url; + return ImageResult::OK; + } + + + MangaGenericSearchPage& MangaGenericSearchPage::search_handler(const char *search_prefix, const char *page_prefix, int page_start) { + search_query.search_prefix = search_prefix; + search_query.page_prefix = page_prefix; + search_query.page_start = page_start; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::text_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { + text_query.html_query = html_query; + text_query.title_field = title_field; + text_query.url_field = url_field; + text_query.url_contains = url_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::thumbnail_handler(const char *html_query, const char *field_name, const char *field_contains) { + thumbnail_query.html_query = html_query; + thumbnail_query.field_name = field_name; + thumbnail_query.field_contains = field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_chapters_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { + list_chapters_query.html_query = html_query; + list_chapters_query.title_field = title_field; + list_chapters_query.url_field = url_field; + list_chapters_query.url_contains = url_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_chapters_uploaded_time_handler(const char *html_query, const char *field_name, const char *field_contains) { + list_chapters_query.uploaded_time_html_query = html_query; + list_chapters_query.uploaded_time_field_name = field_name; + list_chapters_query.uploaded_time_field_contains = field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_page_images_handler(const char *html_query, const char *field_name, const char *field_contains, ListPageImagesQueryPost post_handler) { + list_page_query.type = ListPageQueryType::IMAGES; + list_page_query.images_query.html_query = html_query; + list_page_query.images_query.field_name = field_name; + list_page_query.images_query.field_contains = field_contains; + list_page_query.images_query.post_handler = post_handler; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_page_images_pagination_handler( + const char *pages_html_query, const char *pages_field_name, const char *pages_field_contains, ListPagePaginationPagesPost pages_post_handler, + const char *image_html_query, const char *image_field_name, const char *image_field_contains, + const char *next_page_html_query, const char *next_page_field_name, const char *next_page_field_contains) + { + assert(pages_post_handler); + list_page_query.type = ListPageQueryType::PAGINATION; + list_page_query.pagination_query.pages_html_query = pages_html_query; + list_page_query.pagination_query.pages_field_name = pages_field_name; + list_page_query.pagination_query.pages_field_contains = pages_field_contains; + list_page_query.pagination_query.pages_post_handler = pages_post_handler; + + list_page_query.pagination_query.image_html_query = image_html_query; + list_page_query.pagination_query.image_field_name = image_field_name; + list_page_query.pagination_query.image_field_contains = image_field_contains; + + list_page_query.pagination_query.next_page_html_query = next_page_html_query; + list_page_query.pagination_query.next_page_field_name = next_page_field_name; + list_page_query.pagination_query.next_page_field_contains = next_page_field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::manga_id_handler(const char *prefix, const char *end) { + manga_id_extractor.prefix = prefix; + manga_id_extractor.end = end; + return *this; + } +} \ No newline at end of file diff --git a/src/plugins/Mangadex.cpp b/src/plugins/Mangadex.cpp index 44c9762..0d0c601 100644 --- a/src/plugins/Mangadex.cpp +++ b/src/plugins/Mangadex.cpp @@ -219,7 +219,28 @@ namespace QuickMedia { return true; } - ImageResult MangadexImagesPage::get_page_image_urls(std::vector &urls) { + ImageResult MangadexImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + + ImageResult MangadexImagesPage::for_each_page_in_chapter(PageCallback callback) { + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + + ImageResult MangadexImagesPage::get_image_urls_for_chapter(const std::string &url) { + if(!chapter_image_urls.empty()) + return ImageResult::OK; + std::string cookie_filepath; if(!get_cookie_filepath(cookie_filepath)) return ImageResult::ERR; @@ -261,11 +282,11 @@ namespace QuickMedia { continue; std::string image_url = server + chapter_hash_str + "/" + image_name.asCString(); - urls.push_back(std::move(image_url)); + chapter_image_urls.push_back(std::move(image_url)); } } - if(urls.empty()) + if(chapter_image_urls.empty()) return ImageResult::ERR; return ImageResult::OK; diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp index b67acb2..e63ff6c 100644 --- a/src/plugins/Manganelo.cpp +++ b/src/plugins/Manganelo.cpp @@ -226,7 +226,28 @@ namespace QuickMedia { return PluginResult::OK; } - ImageResult ManganeloImagesPage::get_page_image_urls(std::vector &urls) { + ImageResult ManganeloImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + + ImageResult ManganeloImagesPage::for_each_page_in_chapter(PageCallback callback) { + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + + ImageResult ManganeloImagesPage::get_image_urls_for_chapter(const std::string &url) { + if(!chapter_image_urls.empty()) + return ImageResult::OK; + std::string website_data; if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) return ImageResult::NET_ERR; @@ -244,16 +265,14 @@ namespace QuickMedia { std::string image_url = strip(src); urls->push_back(std::move(image_url)); } - }, &urls); + }, &chapter_image_urls); cleanup: quickmedia_html_search_deinit(&html_search); - if(result != 0) - return ImageResult::ERR; - - if(urls.empty()) + if(result != 0 || chapter_image_urls.empty()) { + chapter_image_urls.clear(); return ImageResult::ERR; - + } return ImageResult::OK; } } \ No newline at end of file diff --git a/src/plugins/Manganelos.cpp b/src/plugins/Manganelos.cpp deleted file mode 100644 index 04d9ca2..0000000 --- a/src/plugins/Manganelos.cpp +++ /dev/null @@ -1,154 +0,0 @@ -#include "../../plugins/Manganelos.hpp" -#include "../../include/Notification.hpp" -#include "../../include/StringUtils.hpp" -#include "../../include/NetUtils.hpp" -#include - -namespace QuickMedia { - static SearchResult search_page(const std::string &str, int page, BodyItems &result_items) { - std::string url = "http://manganelos.com/search?q="; - url += url_param_encode(str); - url += "&page=" + std::to_string(page); - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return SearchResult::NET_ERR; - - if(website_data.empty()) - return SearchResult::OK; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='media-left cover-manga']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - if(href && title && strstr(href, "/manga/")) { - auto item = BodyItem::create(strip(title)); - item->url = strip(href); - item_data->push_back(std::move(item)); - } - }, &result_items); - - BodyItemContext body_item_image_context; - body_item_image_context.body_items = &result_items; - body_item_image_context.index = 0; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='media-left cover-manga']//img[class='media-object']", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && strstr(src, "/mangaimage/") && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; - item_data->index++; - } - }, &body_item_image_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - return SearchResult::OK; - } - - SearchResult ManganelosSearchPage::search(const std::string &str, BodyItems &result_items) { - return search_page(str, 1, result_items); - } - - PluginResult ManganelosSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - return search_result_to_plugin_result(search_page(str, 1 + page, result_items)); - } - - PluginResult ManganelosSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - BodyItems chapters_items; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return PluginResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//section[id='examples']//div[class='chapter-list']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text) { - auto item = BodyItem::create(strip(text)); - item->url = strip(href); - item_data->push_back(std::move(item)); - } - }, &chapters_items); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return PluginResult::ERR; - - auto body = create_body(); - body->items = std::move(chapters_items); - result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); - return PluginResult::OK; - } - - PluginResult ManganelosChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url), nullptr}); - return PluginResult::OK; - } - - bool ManganelosChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { - size_t start_index = url.find("/manga/"); - if(start_index == std::string::npos) - return false; - - start_index += 7; - size_t end_index = url.find("?", start_index); - if(end_index == std::string::npos) { - manga_id = url.substr(start_index); - return true; - } - - manga_id = url.substr(start_index, end_index - start_index); - return true; - } - - ImageResult ManganelosImagesPage::get_page_image_urls(std::vector &urls) { - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return ImageResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//p[id='arraydata']", - [](QuickMediaHtmlNode *node, void *userdata) { - std::vector *chapter_image_urls = (std::vector*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text) { - string_split(text, ',', [chapter_image_urls](const char *str, size_t size) { - std::string url(str, size); - url = strip(url); - chapter_image_urls->push_back(std::move(url)); - return true; - }); - } - }, &urls); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return ImageResult::ERR; - - if(urls.empty()) - return ImageResult::ERR; - - return ImageResult::OK; - } -} \ No newline at end of file diff --git a/src/plugins/Mangatown.cpp b/src/plugins/Mangatown.cpp deleted file mode 100644 index 4db1ebc..0000000 --- a/src/plugins/Mangatown.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include "../../plugins/Mangatown.hpp" -#include "../../include/Notification.hpp" -#include "../../include/StringUtils.hpp" -#include "../../include/NetUtils.hpp" -#include - -static const std::string mangatown_url = "https://www.mangatown.com"; - -namespace QuickMedia { - static bool is_number_with_zero_fill(const char *str) { - while(*str == '0') { ++str; } - return atoi(str) != 0; - } - - static SearchResult search_page(const std::string &str, int page, BodyItems &result_items) { - std::string url = "https://www.mangatown.com/search?name="; - url += url_param_encode(str); - url += "&page=" + std::to_string(page); - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return SearchResult::NET_ERR; - - if(website_data.empty()) - return SearchResult::OK; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//p[class='title']/a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - if(href && title && strncmp(href, "/manga/", 7) == 0) { - auto item = BodyItem::create(strip(title)); - item->url = mangatown_url + strip(href); - item_data->push_back(std::move(item)); - } - }, &result_items); - - BodyItemContext body_item_image_context; - body_item_image_context.body_items = &result_items; - body_item_image_context.index = 0; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//a[class='manga_cover']/img", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; - item_data->index++; - } - }, &body_item_image_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - return SearchResult::OK; - } - - SearchResult MangatownSearchPage::search(const std::string &str, BodyItems &result_items) { - return search_page(str, 1, result_items); - } - - PluginResult MangatownSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - return search_result_to_plugin_result(search_page(str, 1 + page, result_items)); - } - - PluginResult MangatownSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - BodyItems chapters_items; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return PluginResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='chapter_list']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text && strncmp(href, "/manga/", 7) == 0) { - auto item = BodyItem::create(strip(text)); - item->url = mangatown_url + strip(href); - item_data->push_back(std::move(item)); - } - }, &chapters_items); - - BodyItemContext body_item_context; - body_item_context.body_items = &chapters_items; - body_item_context.index = 0; - - quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='chapter_list']//span[class='time']", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text && item_data->index < item_data->body_items->size()) { - std::string uploaded_date = strip(text); - (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + uploaded_date); - item_data->index++; - } - }, &body_item_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return PluginResult::ERR; - - auto body = create_body(); - body->items = std::move(chapters_items); - result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); - return PluginResult::OK; - } - - PluginResult MangatownChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url), nullptr}); - return PluginResult::OK; - } - - bool MangatownChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { - size_t start_index = url.find("/manga/"); - if(start_index == std::string::npos) - return false; - - start_index += 7; - size_t end_index = url.find("/", start_index); - if(end_index == std::string::npos) { - manga_id = url.substr(start_index); - return true; - } - - manga_id = url.substr(start_index, end_index - start_index); - return true; - } - - // First page = 0 - static std::string first_image_url_to_page_image_url(const std::string &image_url, int page) { - size_t slash_index = image_url.rfind('/'); - if(slash_index == std::string::npos) - return ""; - - size_t dot_index = image_url.rfind('.'); - if(dot_index == std::string::npos || dot_index <= slash_index) - return ""; - - std::string filename_without_ext = image_url.substr(slash_index + 1, dot_index - (slash_index + 1)); - if(!filename_without_ext.empty() && filename_without_ext.back() == '1') - ++page; - - std::string page_str = std::to_string(page); - if(page_str.size() > filename_without_ext.size()) - return ""; - - filename_without_ext.replace(filename_without_ext.size() - page_str.size(), page_str.size(), page_str); - return image_url.substr(0, slash_index) + '/' + filename_without_ext + image_url.substr(dot_index); - } - - ImageResult MangatownImagesPage::get_page_image_urls(std::vector &urls) { - std::string image_src; - int num_pages = 0; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return ImageResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='viewer']//img", - [](QuickMediaHtmlNode *node, void *userdata) { - std::string *image_src = (std::string*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && strstr(src, "/store/manga/")) { - if(strncmp(src, "//", 2) == 0) - *image_src = strip(src + 2); - else - *image_src = strip(src); - } - }, &image_src); - - if(result != 0 || image_src.empty()) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='page_select']//option", - [](QuickMediaHtmlNode *node, void *userdata) { - int *last_num_pages = (int*)userdata; - const char *value = quickmedia_html_node_get_attribute_value(node, "value"); - const char *text = quickmedia_html_node_get_text(node); - if(value && strncmp(value, "/manga/", 7) == 0) { - if(is_number_with_zero_fill(text)) { - (*last_num_pages)++; - } - } - }, &num_pages); - - num_pages /= 2; - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0 || image_src.empty() || num_pages == 0) - return ImageResult::ERR; - - for(int i = 0; i < num_pages; ++i) { - urls.push_back(first_image_url_to_page_image_url(image_src, i)); - } - - return ImageResult::OK; - } -} \ No newline at end of file -- cgit v1.2.3