From ba4e62d55156f9b94b569b56b6382bbcf94b7d86 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 16 Apr 2021 09:37:53 +0200 Subject: Convert mangatown and manganelos into a generic manga plugin Revert for_each_page.. processing of manga instead of getting all pages. Mangatown requires you to navigate page by page, cant predict what a specific pages image url will be. --- plugins/MangaGeneric.hpp | 166 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 plugins/MangaGeneric.hpp (limited to 'plugins/MangaGeneric.hpp') diff --git a/plugins/MangaGeneric.hpp b/plugins/MangaGeneric.hpp new file mode 100644 index 0000000..7d63622 --- /dev/null +++ b/plugins/MangaGeneric.hpp @@ -0,0 +1,166 @@ +#pragma once + +#include "Manga.hpp" +#include + +namespace QuickMedia { + struct SearchQuery { + const char *search_prefix = nullptr; + const char *page_prefix = nullptr; + int page_start = 0; + }; + + struct TextQuery { + const char *html_query = nullptr; + const char *title_field = nullptr; + const char *url_field = nullptr; + const char *url_contains = nullptr; + }; + + struct ThumbnailQuery { + const char *html_query = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct ListChaptersQuery { + const char *html_query = nullptr; + const char *title_field = nullptr; + const char *url_field = nullptr; + const char *url_contains = nullptr; + + const char *uploaded_time_html_query = nullptr; + const char *uploaded_time_field_name = nullptr; + const char *uploaded_time_field_contains = nullptr; + }; + + using ListPageImagesQueryPost = std::function &image_urls)>; + struct ListPageImagesQuery { + const char *html_query = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + ListPageImagesQueryPost post_handler = nullptr; + }; + + // Return the actual number of pages + using ListPagePaginationPagesPost = std::function; + struct ListPagePaginationQuery { + const char *pages_html_query = nullptr; + const char *pages_field_name = nullptr; + const char *pages_field_contains = nullptr; + ListPagePaginationPagesPost pages_post_handler = nullptr; + + const char *image_html_query = nullptr; + const char *image_field_name = nullptr; + const char *image_field_contains = nullptr; + + const char *next_page_html_query = nullptr; + const char *next_page_field_name = nullptr; + const char *next_page_field_contains = nullptr; + }; + + enum class ListPageQueryType { + IMAGES, + PAGINATION + }; + + struct ListPageQuery { + ListPageQueryType type = ListPageQueryType::IMAGES; + ListPageImagesQuery images_query; + ListPagePaginationQuery pagination_query; + }; + + struct MangaIdExtractor { + const char *prefix = nullptr; + const char *end = nullptr; + }; + + class MangaGenericSearchPage : public Page { + public: + MangaGenericSearchPage(Program *program, const char *service_name, const char *website_url); + const char* get_title() const override { return "All"; } + bool search_is_filter() override { return false; } + SearchResult search(const std::string &str, BodyItems &result_items) override; + PluginResult get_page(const std::string &str, int page, BodyItems &result_items) override; + PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; + sf::Vector2i get_thumbnail_max_size() override { return sf::Vector2i(101, 141); }; + + // Add a %s where the query or page number should be inserted into |search_prefix| and |page_prefix|, for example: + // search_prefix: example.com/search?q=%s + // page_prefix: &page=%s + // |page_start| is the first page, so the result page is |page_start| + |page| where page is the current page we are navigating on. + // This is required. + MangaGenericSearchPage& search_handler(const char *search_prefix, const char *page_prefix, int page_start); + // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. + // This is required. + MangaGenericSearchPage& text_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains); + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This is optional. + MangaGenericSearchPage& thumbnail_handler(const char *html_query, const char *field_name, const char *field_contains); + + // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. + // This is required. + MangaGenericSearchPage& list_chapters_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains); + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This is optional. + MangaGenericSearchPage& list_chapters_uploaded_time_handler(const char *html_query, const char *field_name, const char *field_contains); + + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This or |list_page_images_pagination_handler| is required. + MangaGenericSearchPage& list_page_images_handler(const char *html_query, const char *field_name, const char *field_contains, ListPageImagesQueryPost post_handler = nullptr); + + // If |pages_field_contains| or |image_field_contains| is null, then any matching query is added. If |pages_field_name| or |image_field_name| is "text", then the inner text is used. + // This or |list_page_images_handler| is required. + MangaGenericSearchPage& list_page_images_pagination_handler( + const char *pages_html_query, const char *pages_field_name, const char *pages_field_contains, ListPagePaginationPagesPost pages_post_handler, + const char *image_html_query, const char *image_field_name, const char *image_field_contains, + const char *next_page_html_query, const char *next_page_field_name, const char *next_page_field_contains); + + // For example: mangasite.com/manga/204353&f=23 + // /manga/ here would be the |prefix| and & would be |end|. |end| is optional. + // The goal is to extract 204353 from the manga chapter page url. + MangaGenericSearchPage& manga_id_handler(const char *prefix, const char *end); + private: + const char *service_name; + std::string website_url; + SearchQuery search_query; + TextQuery text_query; + ThumbnailQuery thumbnail_query; + ListChaptersQuery list_chapters_query; + ListPageQuery list_page_query; + MangaIdExtractor manga_id_extractor; + }; + + class MangaGenericChaptersPage : public MangaChaptersPage { + public: + MangaGenericChaptersPage(Program *program, std::string manga_name, std::string manga_url, const MangaIdExtractor &manga_id_extractor, const char *service_name, const std::string &website_url, const ListPageQuery *list_page_query) : + MangaChaptersPage(program, std::move(manga_name), std::move(manga_url)), manga_id_extractor(manga_id_extractor), service_name(service_name), website_url(website_url), list_page_query(list_page_query) {} + PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; + protected: + bool extract_id_from_url(const std::string &url, std::string &manga_id) const override; + const char* get_service_name() const override { return service_name; } + private: + MangaIdExtractor manga_id_extractor; + const char *service_name; + std::string website_url; + const ListPageQuery *list_page_query; + }; + + class MangaGenericImagesPage : public MangaImagesPage { + public: + MangaGenericImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url, const char *service_name, const std::string &website_url, const ListPageQuery *list_page_query) : + MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)), service_name(service_name), website_url(website_url), list_page_query(list_page_query) {} + ImageResult get_number_of_images(int &num_images) override; + ImageResult for_each_page_in_chapter(PageCallback callback) override; + const char* get_service_name() const override { return service_name; } + private: + ImageResult get_page_image_urls(); + private: + const char *service_name; + std::string website_url; + const ListPageQuery *list_page_query; + std::string prev_chapter_url; + std::string current_image_url; + std::string next_page_url; + }; +} \ No newline at end of file -- cgit v1.2.3