From ba4e62d55156f9b94b569b56b6382bbcf94b7d86 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 16 Apr 2021 09:37:53 +0200 Subject: Convert mangatown and manganelos into a generic manga plugin Revert for_each_page.. processing of manga instead of getting all pages. Mangatown requires you to navigate page by page, cant predict what a specific pages image url will be. --- TODO | 44 +--- plugins/Manga.hpp | 14 +- plugins/MangaGeneric.hpp | 166 +++++++++++++ plugins/Mangadex.hpp | 5 +- plugins/Manganelo.hpp | 5 +- plugins/Manganelos.hpp | 33 --- plugins/Mangatown.hpp | 33 --- src/QuickMedia.cpp | 71 ++++-- src/SearchBar.cpp | 1 + src/plugins/MangaGeneric.cpp | 567 +++++++++++++++++++++++++++++++++++++++++++ src/plugins/Mangadex.cpp | 27 ++- src/plugins/Manganelo.cpp | 33 ++- src/plugins/Manganelos.cpp | 154 ------------ src/plugins/Mangatown.cpp | 217 ----------------- 14 files changed, 868 insertions(+), 502 deletions(-) create mode 100644 plugins/MangaGeneric.hpp delete mode 100644 plugins/Manganelos.hpp delete mode 100644 plugins/Mangatown.hpp create mode 100644 src/plugins/MangaGeneric.cpp delete mode 100644 src/plugins/Manganelos.cpp delete mode 100644 src/plugins/Mangatown.cpp diff --git a/TODO b/TODO index 76cdc4a..b29e434 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,6 @@ Give user the option to start where they left off or from the start or from the Add grid-view when thumbnails are visible. Add scrollbar. Somehow deal with youtube banning ip when searching too often. -Optimize shadow rendering for items (Right now they fill too much space that is behind items). It should also be a blurry shadow. When continuing to read manga from a different page from the first and there is no cache for the chapter, then start downloading from the current page instead of page 1. Show progress of manga in the history tab (current chapter out of total chapters). Animate page navigation. @@ -19,10 +18,7 @@ Use fallback cjk font for regular sf::Text as well (search, tabs, chapter name w Fix some japanese fonts not rendering (half width alphanumeric?). Extract thumbnail from images that are being downloaded, while its downloading and show that while the full image is downloading (upscaled, or with blurhash). Add setting to disable sending typing events to the server (matrix). -Support emoji (mainly for matrix), by readding Text code from dchat. Also do the same but for inline images, text editing and url colors and clicking (also clicking on inline images). -Also take code from dchat to support gifs (inline in text). -Use pixel buffer object for asynchronous texture transfer to gpu? is this necessary? -Add option to edit input in vim (using temporary file). +Take code from dchat to support gifs (inline in text). Scrolling in images still messes up the |current| page sometimes, need a way to fix this. Add ctrl+i keybind when viewing an image on 4chan to reverse image search it (using google, yandex and saucenao). Show filename at the bottom when viewing an image/video on 4chan. @@ -34,27 +30,23 @@ Sanitize check: do not allow pasting more than 2gb of text. Only add related videos to recommendations if its the first time we watch the video. This is to prevent rewatching a video multiple times from messing up recommendations. Implement mentions in matrix with an autofill list, like on element. Also do the same with / commands. Add option to disable autosearch and search when pressing enter instead or something? this would be needed for mobile phones where typing is slow. -Sleep when idle, to reduce cpu usage from 1-2% to 0%, important for mobile devices. Also render view to a rendertexture and render that instead of redrawing every time every time. +Render view to a rendertexture and render that instead of redrawing every time every time. Provide a way to specify when notifications should be received (using matrix api) and also read the notification config from matrix. Also provide a way to disable notifications globally. Use quickmedia to show image in matrix rooms, instead of mpv. Add command to ban users. Support peertube (works with mpv, but need to implement search and related videos). -Scroll to bottom when receiving a new message even if the selected message is not the last one. It should instead scroll if the last message is visible on the screen. -Also add a tab for common directories and recently accessed files/directories (the directories would be the directory of used files). +Add a tab for common directories and recently accessed files/directories (the directories would be the directory of used files). Provide a way to go to the first unread message in matrix and also show a marker in the body (maybe a red line?) where the first unread message is. Cleanup keybindings. Some require ctrl, some dont. Add room topic beside room name in matrix (in messages tab). -Add /me to matrix, emoji, reactions... Set the icon of the window to be the icon of the plugin. Nice for KDE, GNOME, etc with titlebars. If --no-audio is used then music should be played with a lightweight music player instead. MPV is heavy even for music (60mb RAM). Maybe use sfml audio functions? -Optimize startup time. Update 4chan thread in real time, just like 4chan-x. Save the original event message, so when replying for example we can use the original message as the replying to message, rather than our converted "body" text. Remove tidy dependency and use my own html-parser. Add option to sort by other than timestamp for nyaa.si. Add url preview for matrix (using matrix api, fallback to client url preview (using our own url preview project) if disabled by the homeserver). IMPORTANT: Cleanup old messages in matrix (from matrix plugin), and instead either save them to disk or refetch them from server when going up to read old messages. (High memory usage, high disk space) -Use memberName() instead of key() when iterating json object. key() creates a copy, memberName() doesn't. Do not try to reload/redownload thumbnail that fails to download after its cleared when its no longer visible on screen and then becomes visible. Show google recaptcha on youtube when search/play fails, which can happen when using tor. Show notifications when we receive a message in a matrix room even if we are not mentioned. This happens when we have set to receive notifications for all messages. @@ -63,7 +55,7 @@ Show 4chan warnings as warnings instead of ban when posting a message (show the Add tabs. Using tabs with tabbed is not as good of a solution as it would use much more memory (opengl context cost) and with our own tabs, we can clear thumbnails and other cache when a tab is in the background. Changing tab either with ctrl+tab or mouse click. ctrl+enter to open a new tab. Ctrl+q or ctrl+w to close a tab. Remove related videos that have already been watched (except the first related video, which is the "watch next" video, usually the next part of a serie). Add F5 to refresh page. -Support m.sticker, m.direct, and other matrix events. +Support m.sticker and other matrix events. Allow choosing which translation/scanlation to use on mangadex. Right now it uses the latest one, which is most likely to be the best. Add file upload to 4chan. Retry download if it fails, at least 3 times (observed to be needed for mangadex images). @@ -76,18 +68,14 @@ Fix inconsistent behavior when editing a message that is replied to in matrix. R to fix this we could perhaps replace the newly created body items for replies when loading old messages and one of the old messages is also one of the embedded messages (by event id). Add button to skip to next video. MPV has this feature when setting "next" video (can be done over IPC). Use a custom allocator that replaces malloc/realloc/free/new/delete to release memory properly, using munmap in free/delete. The C allocator doesn't do that! memory usage remains high after one large allocation. The C allocator only marks it as free. -Ignore timestamp ordering for messages in matrix? element seems to do that (or only for new messages???), and also we need the latest message to be last I guess to set read markers properly? Merge |Page::search| and |Page::get_page|. get_page with page 0 should be the same as search. Disable posting in 4chan thread if closed (thread json contains "closed" field for OP). -Remove calls to get the original message of an edit in edits and replies in matrix if possible. These calls take additional time, and with a slow homeserver or high ping this could make messages to be delayed by an annoying amount of time. Read image exif into to apply image rotation. This is common in images taken on phones. If not done, the width and height will also be mixed and thumbnail fallback size will be incorrectly calculated (for example in matrix). Handle M_LIMIT_EXCEEDED in matrix Maybe dont clear cache for body items when filtering. -Change scroll in body when previous items change size (such as when thumbnail has finished loading). Pressing enter on a pinned message should go to the message in the messages tab. Display file list for nyaa. Remove reply formatting for NOTICE in matrix as well. -Scroll body when adding new items and the selected item fits after the scroll (needed for matrix where we want to see new messages when the last item is not selected). Or show the last item when its not visible in matrix (at the bottom, just like when replying/editing). Implement our own encryption for matrix. This is also needed to make forwarded message work. Pantalaimon ignores them! Modify matrix sync to download and parse json but not handle it, and then add a function to handle the json. This would allow us to remove all the mutex code if we would call that new method from the main thread (similar to chromium multithreading). Fetch replies/pinned message using multiple threads. @@ -95,22 +83,19 @@ Show in room tags list when there is a message in any of the rooms in the tag. Support webp. Then switch to the youtube thumbnails from the response json instead of hqdefault, to remove the black bars. Show images while they download by showing them as scanlines starting from the top. Needed for slow websites such as 4chan. Use curl parallel download instead of downloading with multiple threads. This can be done with multiple -O parameters. -Handle matrix groups? (which also contains join, invite, leave...). Add functionality to ignore users in matrix. This is done with an ignore request and we wont get messages and invites from that user anymore. Also add option to ignore in the invites page. Add keybind to go to invites page from any page. Show marker beside pinned messages tab name if there are new pinned messages. -Make /logout work everywhere, not only in room message input. +Make /logout work everywhere, not only in room message input (or add a logout button to settings tab?). Add a notifications tab to show messages that mention us in all rooms (and then press enter to go to that message in that room), also add a unread/mentioned rooms list tab to only show rooms with unread messages or mentions. Disable message input in matrix when muted. Preview rooms? -Handle matrix token being invalidated while running. -Update upload limit if its updated on the server (can it be updated while the server is running?). +Handle matrix token being invalidated while running and not running. +Update upload limit if its updated on the server. Editing a reply removes reply formatting (both in body and formatted_body). Element also does this when you edit a reply twice. This breaks element mobile that is unable to display replied-to messages without correct formatting (doesn't fetch the replied-to message). This also removes the mentioned name which breaks mention for reply. Implement m.room.tombstone. Show a marker when a room uses encryption. -Remove replied-to message text in room preview. That shows ignored users text and we want to see the reply message instead anyways. -Update room name/avatar with new data in /sync. Scroll tabs if there are more than 3 tab items and show arrow on left/right side when there are more items to see. Make a shader for Text for changing color instead of updating the text geometry. Or loop vertices and set their color to the new color without updating the text geometry. Automatically retry sending messages that fails to send (after timeout). These failed to send messages should be stored on disk and retried when going back to the room or restarting QuickMedia. @@ -121,42 +106,31 @@ then add a gap between old messages from before sync and after sync so we can fe Fetching of previous messages should also be saved in the /sync file and messages fetched with get_message_by_id, which would cache embedded items and pinned messages; also cache users. If manga page fails to download then show "failed to download image" as text and bind F5 to refresh (retry download). Use " in the room description instead of "Message deleted". Limit size of Entry and scroll content instead. -Have an option to remove membership events from room unread messages, so that only text/media messages update room unread description in the room list. This could be implemented by doing /sync and filtering to only show m.room.message event types or something similar and limiting it to the last message, -then comparing that to the read marker. -Comparing latest message for unread message should be done by comparing event id ascii-wise instead of checking if the latest message is equal to the read marker event id. This is to fix unread messages in the case of the latest message in a room being deleted. -Support replying to messages with media, by typing /upload in the reply text. Instead of doing a GET for the first N bytes to check if a video is streamable, start streaming the video and if the first bytes doesn't contain MOOV then wait until the whole video has downloaded before playing it. -Create thumbnail when uploading an image in matrix. If a message reply is edited and its a reply to us, then we want the text to be red even if its edited. Implement matrix spoiler, see: https://github.com/matrix-org/matrix-doc/blob/master/proposals/2010-spoilers.md. -Make the messages that mention us red using the matrix notification api that we already use. Mark those messages as mentions us. Replace sfml font glyph loading completely with FreeType. There is a very old bug in sfml that causes text to get corrupt sometimes. Im guessing this happens when adding new characters to the font atlas and that coordinates for the glyphs become incorrect? Add arguments to pipe plugin to pass input and output fifo for sending commands to QuickMedia and receiving events. -Update thumbnails in file-manager if an image is replaced, by including the modify date of the image in the thumbnail cache as well. Create a workaround for dwm terminal swallow patch stealing mpv when moving QuickMedia to another monitor sometimes. Maybe check for structure notify events on mpv and reparent and select input on the mpv window again? Add option to decline and mute user in invites. This is to combat invite spam, where muted users cant invite you. Allow hiding videos so they dont show up in recommendations and related videos. Add an option to select video resolution, if we want to use less power and less bandwidth for example. Use mpv option --gpu-context=x11egl on pinephone to force xwayland on wayland, to be able to embed the mpv window inside the quickmedia. Replies to the local user shouldn't remove the red text. Maybe fix this by checking if the reply to message user is the local user or when the replied to message has loaded then make the reply red if its a reply to us. Also for existing messages check if the message is a notification message and then make the message red. -Sort reactions by timestamp. Check what happens with xsrf_token if comments are not fetched for a long time. Does it time out? if so do we need to refetch the video page to get the new token?. Add support for comments in live youtube videos, api is at: https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8. Make video visible when reading comments (youtube). Convert nyaa.si/spotify/soundcloud date from ISO date string to local time. -When ui is scaled then the predicated thumbnail size will be wrong since its scaled in Body but not in the plugins where they are requested. +When ui is scaled then the predicted thumbnail size will be wrong since its scaled in Body but not in the plugins where they are requested. Check if get_page handlers in pages need to check if next batch is valid. If the server returns empty next batch we shouldn't fetch the first page... Cloudflare kicks in when downloading manga on manganelo.. figure out a way to bypass it. This doesn't seem to happen when using python requests as is done in AutoMedia. Replace cppcodec with another library for base64 url encoding/decoding. Its way too large for what it does. \ No newline at end of file diff --git a/plugins/Manga.hpp b/plugins/Manga.hpp index 60e739b..5dfa800 100644 --- a/plugins/Manga.hpp +++ b/plugins/Manga.hpp @@ -14,27 +14,35 @@ namespace QuickMedia { class MangaImagesPage : public Page { public: - MangaImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : Page(program), manga_name(std::move(manga_name)), chapter_name(std::move(chapter_name)), url(std::move(url)) {} + MangaImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : Page(program), manga_name(std::move(manga_name)), chapter_name(std::move(chapter_name)), url(std::move(url)), chapter_num_pages(-1) {} virtual ~MangaImagesPage() = default; const char* get_title() const override { return chapter_name.c_str(); } PageTypez get_type() const override { return PageTypez::MANGA_IMAGES; } - virtual ImageResult get_page_image_urls(std::vector &urls) = 0; + virtual ImageResult get_number_of_images(int &num_images) = 0; + virtual ImageResult for_each_page_in_chapter(PageCallback callback) = 0; virtual void change_chapter(std::string new_chapter_name, std::string new_url) { chapter_name = std::move(new_chapter_name); - url = std::move(new_url); + if(url != new_url) { + url = std::move(new_url); + chapter_image_urls.clear(); + chapter_num_pages = -1; + } } const std::string& get_chapter_name() const { return chapter_name; } const std::string& get_url() const { return url; } + // TODO: Remove and use plugin name instead virtual const char* get_service_name() const = 0; const std::string manga_name; protected: std::string chapter_name; std::string url; + std::vector chapter_image_urls; + int chapter_num_pages; }; class MangaChaptersPage : public TrackablePage { diff --git a/plugins/MangaGeneric.hpp b/plugins/MangaGeneric.hpp new file mode 100644 index 0000000..7d63622 --- /dev/null +++ b/plugins/MangaGeneric.hpp @@ -0,0 +1,166 @@ +#pragma once + +#include "Manga.hpp" +#include + +namespace QuickMedia { + struct SearchQuery { + const char *search_prefix = nullptr; + const char *page_prefix = nullptr; + int page_start = 0; + }; + + struct TextQuery { + const char *html_query = nullptr; + const char *title_field = nullptr; + const char *url_field = nullptr; + const char *url_contains = nullptr; + }; + + struct ThumbnailQuery { + const char *html_query = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct ListChaptersQuery { + const char *html_query = nullptr; + const char *title_field = nullptr; + const char *url_field = nullptr; + const char *url_contains = nullptr; + + const char *uploaded_time_html_query = nullptr; + const char *uploaded_time_field_name = nullptr; + const char *uploaded_time_field_contains = nullptr; + }; + + using ListPageImagesQueryPost = std::function &image_urls)>; + struct ListPageImagesQuery { + const char *html_query = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + ListPageImagesQueryPost post_handler = nullptr; + }; + + // Return the actual number of pages + using ListPagePaginationPagesPost = std::function; + struct ListPagePaginationQuery { + const char *pages_html_query = nullptr; + const char *pages_field_name = nullptr; + const char *pages_field_contains = nullptr; + ListPagePaginationPagesPost pages_post_handler = nullptr; + + const char *image_html_query = nullptr; + const char *image_field_name = nullptr; + const char *image_field_contains = nullptr; + + const char *next_page_html_query = nullptr; + const char *next_page_field_name = nullptr; + const char *next_page_field_contains = nullptr; + }; + + enum class ListPageQueryType { + IMAGES, + PAGINATION + }; + + struct ListPageQuery { + ListPageQueryType type = ListPageQueryType::IMAGES; + ListPageImagesQuery images_query; + ListPagePaginationQuery pagination_query; + }; + + struct MangaIdExtractor { + const char *prefix = nullptr; + const char *end = nullptr; + }; + + class MangaGenericSearchPage : public Page { + public: + MangaGenericSearchPage(Program *program, const char *service_name, const char *website_url); + const char* get_title() const override { return "All"; } + bool search_is_filter() override { return false; } + SearchResult search(const std::string &str, BodyItems &result_items) override; + PluginResult get_page(const std::string &str, int page, BodyItems &result_items) override; + PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; + sf::Vector2i get_thumbnail_max_size() override { return sf::Vector2i(101, 141); }; + + // Add a %s where the query or page number should be inserted into |search_prefix| and |page_prefix|, for example: + // search_prefix: example.com/search?q=%s + // page_prefix: &page=%s + // |page_start| is the first page, so the result page is |page_start| + |page| where page is the current page we are navigating on. + // This is required. + MangaGenericSearchPage& search_handler(const char *search_prefix, const char *page_prefix, int page_start); + // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. + // This is required. + MangaGenericSearchPage& text_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains); + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This is optional. + MangaGenericSearchPage& thumbnail_handler(const char *html_query, const char *field_name, const char *field_contains); + + // If |url_contains| is null, then any matching query is added. If |title_field| is "text", then the inner text is used. + // This is required. + MangaGenericSearchPage& list_chapters_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains); + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This is optional. + MangaGenericSearchPage& list_chapters_uploaded_time_handler(const char *html_query, const char *field_name, const char *field_contains); + + // If |field_contains| is null, then any matching query is added. If |field_name| is "text", then the inner text is used. + // This or |list_page_images_pagination_handler| is required. + MangaGenericSearchPage& list_page_images_handler(const char *html_query, const char *field_name, const char *field_contains, ListPageImagesQueryPost post_handler = nullptr); + + // If |pages_field_contains| or |image_field_contains| is null, then any matching query is added. If |pages_field_name| or |image_field_name| is "text", then the inner text is used. + // This or |list_page_images_handler| is required. + MangaGenericSearchPage& list_page_images_pagination_handler( + const char *pages_html_query, const char *pages_field_name, const char *pages_field_contains, ListPagePaginationPagesPost pages_post_handler, + const char *image_html_query, const char *image_field_name, const char *image_field_contains, + const char *next_page_html_query, const char *next_page_field_name, const char *next_page_field_contains); + + // For example: mangasite.com/manga/204353&f=23 + // /manga/ here would be the |prefix| and & would be |end|. |end| is optional. + // The goal is to extract 204353 from the manga chapter page url. + MangaGenericSearchPage& manga_id_handler(const char *prefix, const char *end); + private: + const char *service_name; + std::string website_url; + SearchQuery search_query; + TextQuery text_query; + ThumbnailQuery thumbnail_query; + ListChaptersQuery list_chapters_query; + ListPageQuery list_page_query; + MangaIdExtractor manga_id_extractor; + }; + + class MangaGenericChaptersPage : public MangaChaptersPage { + public: + MangaGenericChaptersPage(Program *program, std::string manga_name, std::string manga_url, const MangaIdExtractor &manga_id_extractor, const char *service_name, const std::string &website_url, const ListPageQuery *list_page_query) : + MangaChaptersPage(program, std::move(manga_name), std::move(manga_url)), manga_id_extractor(manga_id_extractor), service_name(service_name), website_url(website_url), list_page_query(list_page_query) {} + PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; + protected: + bool extract_id_from_url(const std::string &url, std::string &manga_id) const override; + const char* get_service_name() const override { return service_name; } + private: + MangaIdExtractor manga_id_extractor; + const char *service_name; + std::string website_url; + const ListPageQuery *list_page_query; + }; + + class MangaGenericImagesPage : public MangaImagesPage { + public: + MangaGenericImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url, const char *service_name, const std::string &website_url, const ListPageQuery *list_page_query) : + MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)), service_name(service_name), website_url(website_url), list_page_query(list_page_query) {} + ImageResult get_number_of_images(int &num_images) override; + ImageResult for_each_page_in_chapter(PageCallback callback) override; + const char* get_service_name() const override { return service_name; } + private: + ImageResult get_page_image_urls(); + private: + const char *service_name; + std::string website_url; + const ListPageQuery *list_page_query; + std::string prev_chapter_url; + std::string current_image_url; + std::string next_page_url; + }; +} \ No newline at end of file diff --git a/plugins/Mangadex.hpp b/plugins/Mangadex.hpp index e4a64d8..7fd6ca0 100644 --- a/plugins/Mangadex.hpp +++ b/plugins/Mangadex.hpp @@ -31,9 +31,12 @@ namespace QuickMedia { class MangadexImagesPage : public MangaImagesPage { public: MangadexImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)) {} - ImageResult get_page_image_urls(std::vector &urls) override; + ImageResult get_number_of_images(int &num_images) override; + ImageResult for_each_page_in_chapter(PageCallback callback) override; const char* get_service_name() const override { return "mangadex"; } private: + // Cached + ImageResult get_image_urls_for_chapter(const std::string &url); bool save_mangadex_cookies(const std::string &url, const std::string &cookie_filepath); }; } \ No newline at end of file diff --git a/plugins/Manganelo.hpp b/plugins/Manganelo.hpp index 4a7bfc3..530a1f2 100644 --- a/plugins/Manganelo.hpp +++ b/plugins/Manganelo.hpp @@ -37,7 +37,10 @@ namespace QuickMedia { class ManganeloImagesPage : public MangaImagesPage { public: ManganeloImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)) {} - ImageResult get_page_image_urls(std::vector &urls) override; + ImageResult get_number_of_images(int &num_images) override; + ImageResult for_each_page_in_chapter(PageCallback callback) override; const char* get_service_name() const override { return "manganelo"; } + private: + ImageResult get_image_urls_for_chapter(const std::string &url); }; } \ No newline at end of file diff --git a/plugins/Manganelos.hpp b/plugins/Manganelos.hpp deleted file mode 100644 index 4df0c3f..0000000 --- a/plugins/Manganelos.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "Manga.hpp" -#include - -namespace QuickMedia { - class ManganelosSearchPage : public Page { - public: - ManganelosSearchPage(Program *program) : Page(program) {} - const char* get_title() const override { return "All"; } - bool search_is_filter() override { return false; } - SearchResult search(const std::string &str, BodyItems &result_items) override; - PluginResult get_page(const std::string &str, int page, BodyItems &result_items) override; - PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; - sf::Vector2i get_thumbnail_max_size() override { return sf::Vector2i(101, 141); }; - }; - - class ManganelosChaptersPage : public MangaChaptersPage { - public: - ManganelosChaptersPage(Program *program, std::string manga_name, std::string manga_url) : MangaChaptersPage(program, std::move(manga_name), std::move(manga_url)) {} - PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; - protected: - bool extract_id_from_url(const std::string &url, std::string &manga_id) const override; - const char* get_service_name() const override { return "manganelos"; } - }; - - class ManganelosImagesPage : public MangaImagesPage { - public: - ManganelosImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)) {} - ImageResult get_page_image_urls(std::vector &urls) override; - const char* get_service_name() const override { return "manganelos"; } - }; -} \ No newline at end of file diff --git a/plugins/Mangatown.hpp b/plugins/Mangatown.hpp deleted file mode 100644 index 0b6c1c4..0000000 --- a/plugins/Mangatown.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "Manga.hpp" -#include - -namespace QuickMedia { - class MangatownSearchPage : public Page { - public: - MangatownSearchPage(Program *program) : Page(program) {} - const char* get_title() const override { return "All"; } - bool search_is_filter() override { return false; } - SearchResult search(const std::string &str, BodyItems &result_items) override; - PluginResult get_page(const std::string &str, int page, BodyItems &result_items) override; - PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; - sf::Vector2i get_thumbnail_max_size() override { return sf::Vector2i(101, 141); }; - }; - - class MangatownChaptersPage : public MangaChaptersPage { - public: - MangatownChaptersPage(Program *program, std::string manga_name, std::string manga_url) : MangaChaptersPage(program, std::move(manga_name), std::move(manga_url)) {} - PluginResult submit(const std::string &title, const std::string &url, std::vector &result_tabs) override; - protected: - bool extract_id_from_url(const std::string &url, std::string &manga_id) const override; - const char* get_service_name() const override { return "mangatown"; } - }; - - class MangatownImagesPage : public MangaImagesPage { - public: - MangatownImagesPage(Program *program, std::string manga_name, std::string chapter_name, std::string url) : MangaImagesPage(program, std::move(manga_name), std::move(chapter_name), std::move(url)) {} - ImageResult get_page_image_urls(std::vector &urls) override; - const char* get_service_name() const override { return "mangatown"; } - }; -} \ No newline at end of file diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index 5bdc67e..26760ec 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -1,8 +1,7 @@ #include "../include/QuickMedia.hpp" #include "../plugins/Manganelo.hpp" -#include "../plugins/Manganelos.hpp" -#include "../plugins/Mangatown.hpp" #include "../plugins/Mangadex.hpp" +#include "../plugins/MangaGeneric.hpp" #include "../plugins/Youtube.hpp" #include "../plugins/Pornhub.hpp" #include "../plugins/Spankbang.hpp" @@ -675,9 +674,45 @@ namespace QuickMedia { show_room_side_panel = false; else show_room_side_panel = true; + main_thread_id = std::this_thread::get_id(); } + static void add_manganelos_handlers(MangaGenericSearchPage *manga_generic_search_page) { + manga_generic_search_page->search_handler("http://manganelos.com/search?q=", "&page=", 1) + .text_handler("//div[class='media-left cover-manga']//a", "title", "href", "/manga/") + .thumbnail_handler("//div[class='media-left cover-manga']//img[class='media-object']", "src", "/mangaimage/") + .list_chapters_handler("//section[id='examples']//div[class='chapter-list']//a", "text", "href", nullptr) + .list_page_images_handler("//p[id='arraydata']", "text", nullptr, [](std::vector &urls) { + if(urls.size() != 1) + return; + + std::string urls_combined = urls.front(); + urls.clear(); + + string_split(urls_combined, ',', [&urls](const char *str, size_t size) { + std::string url(str, size); + url = strip(url); + urls.push_back(std::move(url)); + return true; + }); + }) + .manga_id_handler("/manga/", "?"); + } + + static void add_mangatown_handlers(MangaGenericSearchPage *manga_generic_search_page) { + manga_generic_search_page->search_handler("https://www.mangatown.com/search?name=", "&page=", 1) + .text_handler("//p[class='title']/a", "title", "href", "/manga/") + .thumbnail_handler("//a[class='manga_cover']/img", "src", nullptr) + .list_chapters_handler("//ul[class='chapter_list']//a", "text", "href", "/manga/") + .list_chapters_uploaded_time_handler("//ul[class='chapter_list']//span[class='time']", "text", nullptr) + .list_page_images_pagination_handler( + "//div[class='page_select']//option", "value", "/manga/", [](int num_pages){ return std::max(0, (num_pages /= 2) - 1); }, + "//img[id='image']", "src", nullptr, + "//a[class='next_page']", "href", nullptr) + .manga_id_handler("/manga/", "/"); + } + void Program::load_plugin_by_name(std::vector &tabs, const char *start_dir) { if(!plugin_name || plugin_name[0] == '\0') return; @@ -727,7 +762,9 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "manganelos") == 0) { auto search_body = create_body(); - tabs.push_back(Tab{std::move(search_body), std::make_unique(this), create_search_bar("Search...", 400)}); + auto search_page = std::make_unique(this, plugin_name, nullptr); + add_manganelos_handlers(search_page.get()); + tabs.push_back(Tab{std::move(search_body), std::move(search_page), create_search_bar("Search...", 400)}); auto history_body = create_body(); auto search_bar = create_search_bar("Search...", SEARCH_DELAY_FILTER); @@ -735,7 +772,9 @@ namespace QuickMedia { tabs.push_back(Tab{std::move(history_body), std::move(history_page), std::move(search_bar)}); } else if(strcmp(plugin_name, "mangatown") == 0) { auto search_body = create_body(); - tabs.push_back(Tab{std::move(search_body), std::make_unique(this), create_search_bar("Search...", 400)}); + auto search_page = std::make_unique(this, plugin_name, "https://www.mangatown.com"); + add_mangatown_handlers(search_page.get()); + tabs.push_back(Tab{std::move(search_body), std::move(search_page), create_search_bar("Search...", 400)}); auto history_body = create_body(); auto search_bar = create_search_bar("Search...", SEARCH_DELAY_FILTER); @@ -2321,24 +2360,24 @@ namespace QuickMedia { Path content_cache_dir_ = content_cache_dir; image_download_future = AsyncTask>([images_page, content_cache_dir_, this](std::promise num_manga_pages_promise) { - std::vector page_image_urls; - if(images_page->get_page_image_urls(page_image_urls) != ImageResult::OK) { + int num_pages = 0; + if(images_page->get_number_of_images(num_pages) != ImageResult::OK) { num_manga_pages_promise.set_value(0); if(!image_download_cancel) show_notification("QuickMedia", "Failed to fetch page images", Urgency::CRITICAL); return; } else { - num_manga_pages_promise.set_value(page_image_urls.size()); - image_upscale_status.resize(page_image_urls.size(), 0); + num_manga_pages_promise.set_value(num_pages); + image_upscale_status.resize(num_pages, 0); } - if(page_image_urls.empty()) + if(num_pages == 0) return; // TODO: Download images in parallel int page = 1; - for(const std::string &url : page_image_urls) { + images_page->for_each_page_in_chapter([this, images_page, &page, content_cache_dir_](const std::string &url) { if(image_download_cancel) - return; + return false; int image_index = page - 1; @@ -2355,7 +2394,7 @@ namespace QuickMedia { } if(get_file_type(image_filepath) != FileType::FILE_NOT_FOUND && upscaled_ok) - continue; + return true; std::vector extra_args; const bool is_manganelo = (strcmp(images_page->get_service_name(), "manganelo") == 0); @@ -2376,7 +2415,7 @@ namespace QuickMedia { size_t file_size = 0; if(download_to_file(url, image_filepath_tmp.data, extra_args, true) != DownloadResult::OK || (is_manganelo && file_get_size(image_filepath_tmp, &file_size) == 0 && file_size < 255)) { if(!image_download_cancel) show_notification("QuickMedia", "Failed to download image: " + url, Urgency::CRITICAL); - continue; + return true; } bool rename_immediately = true; @@ -2412,10 +2451,12 @@ namespace QuickMedia { if(rename(image_filepath_tmp.data.c_str(), image_filepath.data.c_str()) != 0) { perror(image_filepath_tmp.data.c_str()); show_notification("QuickMedia", "Failed to save image to file: " + image_filepath.data, Urgency::CRITICAL); - continue; + return true; } } - } + + return true; + }); }, std::move(num_manga_pages_promise)); sf::Event event; diff --git a/src/SearchBar.cpp b/src/SearchBar.cpp index b20c3b0..63515bd 100644 --- a/src/SearchBar.cpp +++ b/src/SearchBar.cpp @@ -103,6 +103,7 @@ namespace QuickMedia { clear(); updated_search = true; updated_autocomplete = true; + time_since_search_update.restart(); } if(event.type == sf::Event::TextEntered && event.text.unicode != 8 && event.text.unicode != 127) // 8 = backspace, 127 = del diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp new file mode 100644 index 0000000..a359698 --- /dev/null +++ b/src/plugins/MangaGeneric.cpp @@ -0,0 +1,567 @@ +#include "../../plugins/MangaGeneric.hpp" +#include "../../include/StringUtils.hpp" +#include +#include + +namespace QuickMedia { + struct HtmlSearchUserdata { + BodyItems *body_items; + const char *field1 = nullptr; + const char *field2 = nullptr; + const char *field2_contains = nullptr; + }; + + enum class MergeType { + THUMBNAIL, + UPLOAD_TIME + }; + + struct HtmlMergeUserdata { + MergeType type; + BodyItemContext body_item_image_context; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlListPageImagesUserdata { + std::vector *urls; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlPageCountUserdata { + int num_pages = 0; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + struct HtmlPageImageUserdata { + std::string *url = nullptr; + const char *field_name = nullptr; + const char *field_contains = nullptr; + }; + + static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + if(strcmp(field_name, "text") == 0) + return quickmedia_html_node_get_text(node); + else + return quickmedia_html_node_get_attribute_value(node, field_name); + } + + static bool starts_with(const std::string &str, const char *sub) { + size_t sub_len = strlen(sub); + return str.size() >= sub_len && memcmp(str.c_str(), sub, sub_len) == 0; + } + + static int html_append_search(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlSearchUserdata *search_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1); + const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); + if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { + auto item = BodyItem::create(strip(field1_value)); + item->url = strip(field2_value); + search_userdata->body_items->push_back(std::move(item)); + } + }, search_userdata); + } + + static int html_body_item_merge(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlMergeUserdata *merge_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlMergeUserdata *merge_userdata = (HtmlMergeUserdata*)userdata; + BodyItemContext &body_item_image_context = merge_userdata->body_item_image_context; + const char *field_value = html_attr_or_inner_text(node, merge_userdata->field_name); + if(body_item_image_context.index < body_item_image_context.body_items->size() + && field_value && (!merge_userdata->field_contains || strstr(field_value, merge_userdata->field_contains))) + { + if(merge_userdata->type == MergeType::THUMBNAIL) { + (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = strip(field_value); + } else if(merge_userdata->type == MergeType::UPLOAD_TIME) { + std::string uploaded_date = strip(field_value); + (*body_item_image_context.body_items)[body_item_image_context.index]->set_description("Uploaded: " + uploaded_date); + } + body_item_image_context.index++; + } + }, merge_userdata); + } + + static int html_get_page_url(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlPageImageUserdata *page_image_userdata) { + return quickmedia_html_find_nodes_xpath(html_search, html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlPageImageUserdata *page_image_userdata = (HtmlPageImageUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); + if(page_image_userdata->url->empty() && field1_value && (!page_image_userdata->field_contains || strstr(field1_value, page_image_userdata->field_contains))) { + *page_image_userdata->url = strip(field1_value); + } + }, page_image_userdata); + } + + MangaGenericSearchPage::MangaGenericSearchPage(Program *program, const char *service_name, const char *website_url) : Page(program), service_name(service_name), website_url(website_url ? website_url : "") + { + if(!this->website_url.empty()) { + if(this->website_url.back() != '/') + this->website_url.push_back('/'); + } + } + + SearchResult MangaGenericSearchPage::search(const std::string &str, BodyItems &result_items) { + return plugin_result_to_search_result(get_page(str, 0, result_items)); + } + + PluginResult MangaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { + if(!search_query.search_prefix || !search_query.page_prefix || !text_query.html_query || !text_query.title_field || !text_query.url_field) { + assert(false); + return PluginResult::ERR; + } + + HtmlSearchUserdata search_userdata; + search_userdata.body_items = &result_items; + search_userdata.field1 = text_query.title_field; + search_userdata.field2 = text_query.url_field; + search_userdata.field2_contains = text_query.url_contains; + + std::string url = search_query.search_prefix; + url += url_param_encode(str); + url += search_query.page_prefix + std::to_string(search_query.page_start + page); + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return PluginResult::NET_ERR; + + if(website_data.empty()) + return PluginResult::OK; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = html_append_search(&html_search, text_query.html_query, &search_userdata); + if(result != 0) + goto cleanup; + + assert(!thumbnail_query.html_query || thumbnail_query.field_name); + if(thumbnail_query.html_query && thumbnail_query.field_name) { + HtmlMergeUserdata merge_userdata; + merge_userdata.type = MergeType::THUMBNAIL; + merge_userdata.body_item_image_context.body_items = &result_items; + merge_userdata.body_item_image_context.index = 0; + merge_userdata.field_name = thumbnail_query.field_name; + merge_userdata.field_contains = thumbnail_query.field_contains; + result = html_body_item_merge(&html_search, thumbnail_query.html_query, &merge_userdata); + } + + for(auto &body_item : result_items) { + if(starts_with(body_item->url, "//")) + body_item->url = "https://" + body_item->url.substr(2); + else if(starts_with(body_item->url, "/")) + body_item->url = website_url + body_item->url.substr(1); + + if(starts_with(body_item->thumbnail_url, "//")) + body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); + else if(starts_with(body_item->thumbnail_url, "/")) + body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result == 0) { + return PluginResult::OK; + } else { + result_items.clear(); + return PluginResult::ERR; + } + } + + PluginResult MangaGenericSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { + if(!list_chapters_query.html_query || !list_chapters_query.title_field || !list_chapters_query.url_field) { + assert(false); + return PluginResult::ERR; + } + + BodyItems chapters_items; + HtmlSearchUserdata search_userdata; + search_userdata.body_items = &chapters_items; + search_userdata.field1 = list_chapters_query.title_field; + search_userdata.field2 = list_chapters_query.url_field; + search_userdata.field2_contains = list_chapters_query.url_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return PluginResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = html_append_search(&html_search, list_chapters_query.html_query, &search_userdata); + if(result != 0) + goto cleanup; + + assert(!list_chapters_query.uploaded_time_html_query || list_chapters_query.uploaded_time_field_name); + if(list_chapters_query.uploaded_time_html_query && list_chapters_query.uploaded_time_field_name) { + HtmlMergeUserdata merge_userdata; + merge_userdata.type = MergeType::UPLOAD_TIME; + merge_userdata.body_item_image_context.body_items = &chapters_items; + merge_userdata.body_item_image_context.index = 0; + merge_userdata.field_name = list_chapters_query.uploaded_time_field_name; + merge_userdata.field_contains = list_chapters_query.uploaded_time_field_contains; + result = html_body_item_merge(&html_search, list_chapters_query.uploaded_time_html_query, &merge_userdata); + } + + for(auto &body_item : chapters_items) { + if(starts_with(body_item->url, "//")) + body_item->url = "https://" + body_item->url.substr(2); + else if(starts_with(body_item->url, "/")) + body_item->url = website_url + body_item->url.substr(1); + + if(starts_with(body_item->thumbnail_url, "//")) + body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); + else if(starts_with(body_item->thumbnail_url, "/")) + body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0) + return PluginResult::ERR; + + auto body = create_body(); + body->items = std::move(chapters_items); + result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url, manga_id_extractor, service_name, website_url, &list_page_query), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); + return PluginResult::OK; + } + + PluginResult MangaGenericChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { + result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url, service_name, website_url, list_page_query), nullptr}); + return PluginResult::OK; + } + + bool MangaGenericChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { + size_t start_index = url.find(manga_id_extractor.prefix); + if(start_index == std::string::npos) + return false; + + if(!manga_id_extractor.end) { + manga_id = url.substr(start_index); + return true; + } + + start_index += strlen(manga_id_extractor.prefix); + size_t end_index = url.find(manga_id_extractor.end, start_index); + if(end_index == std::string::npos) { + manga_id = url.substr(start_index); + return true; + } + + manga_id = url.substr(start_index, end_index - start_index); + return true; + } + + ImageResult MangaGenericImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + chapter_num_pages = -1; + switch(list_page_query->type) { + case ListPageQueryType::IMAGES: { + ImageResult result = get_page_image_urls(); + if(result != ImageResult::OK) return result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + case ListPageQueryType::PAGINATION: { + const ListPagePaginationQuery *list_page_pagination_query = &list_page_query->pagination_query; + if(!list_page_pagination_query->pages_html_query || !list_page_pagination_query->pages_field_name + || !list_page_pagination_query->image_html_query || !list_page_pagination_query->image_field_name + || !list_page_pagination_query->next_page_html_query || !list_page_pagination_query->next_page_field_name) + { + assert(false); + return ImageResult::ERR; + } + + if(chapter_num_pages != -1) { + num_images = chapter_num_pages; + return ImageResult::OK; + } + + current_image_url.clear(); + next_page_url.clear(); + + HtmlPageCountUserdata page_count_userdata; + page_count_userdata.num_pages = 0; + page_count_userdata.field_name = list_page_pagination_query->pages_field_name; + page_count_userdata.field_contains = list_page_pagination_query->pages_field_contains; + + HtmlPageImageUserdata page_image_userdata; + page_image_userdata.url = ¤t_image_url; + page_image_userdata.field_name = list_page_pagination_query->image_field_name; + page_image_userdata.field_contains = list_page_pagination_query->image_field_contains; + + HtmlPageImageUserdata next_page_userdata; + next_page_userdata.url = &next_page_url; + next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; + next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, list_page_pagination_query->pages_html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlPageCountUserdata *page_count_userdata = (HtmlPageCountUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); + if(field1_value && (!page_count_userdata->field_contains || strstr(field1_value, page_count_userdata->field_contains))) { + page_count_userdata->num_pages++; + } + }, &page_count_userdata); + + if(result == 0 && list_page_pagination_query->pages_post_handler) { + page_count_userdata.num_pages = list_page_pagination_query->pages_post_handler(page_count_userdata.num_pages); + } + + if(result != 0 || page_count_userdata.num_pages == 0) { + result = -1; + goto cleanup; + } + + result = html_get_page_url(&html_search, list_page_pagination_query->image_html_query, &page_image_userdata); + if(result != 0 || current_image_url.empty()) { + result = -1; + goto cleanup; + } + + result = html_get_page_url(&html_search, list_page_pagination_query->next_page_html_query, &next_page_userdata); + if(next_page_url.empty()) + result = -1; + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0) { + current_image_url.clear(); + next_page_url.clear(); + return ImageResult::ERR; + } + + if(starts_with(current_image_url, "//")) + current_image_url = "https://" + current_image_url.substr(2); + else if(starts_with(current_image_url, "/")) + current_image_url = website_url + current_image_url.substr(1); + + num_images = page_count_userdata.num_pages; + chapter_num_pages = num_images; + return ImageResult::OK; + } + } + return ImageResult::OK; + } + + ImageResult MangaGenericImagesPage::for_each_page_in_chapter(PageCallback callback) { + switch(list_page_query->type) { + case ListPageQueryType::IMAGES: { + ImageResult result = get_page_image_urls(); + if(result != ImageResult::OK) return result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + case ListPageQueryType::PAGINATION: { + const ListPagePaginationQuery *list_page_pagination_query = &list_page_query->pagination_query; + if(!list_page_pagination_query->image_html_query || !list_page_pagination_query->image_field_name + || !list_page_pagination_query->next_page_html_query || !list_page_pagination_query->next_page_field_name) + { + assert(false); + return ImageResult::ERR; + } + + int num_images = 0; + ImageResult result = get_number_of_images(num_images); + if(result != ImageResult::OK) return result; + + if(!callback(current_image_url)) + return ImageResult::OK; + + for(int i = 0; i < num_images; ++i) { + std::string full_url = url + next_page_url; + current_image_url.clear(); + next_page_url.clear(); + + HtmlPageImageUserdata page_image_userdata; + page_image_userdata.url = ¤t_image_url; + page_image_userdata.field_name = list_page_pagination_query->image_field_name; + page_image_userdata.field_contains = list_page_pagination_query->image_field_contains; + + HtmlPageImageUserdata next_page_userdata; + next_page_userdata.url = &next_page_url; + next_page_userdata.field_name = list_page_pagination_query->next_page_field_name; + next_page_userdata.field_contains = list_page_pagination_query->next_page_field_contains; + + std::string image_src; + std::string website_data; + if(download_to_string_cache(full_url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + html_get_page_url(&html_search, list_page_pagination_query->image_html_query, &page_image_userdata); + html_get_page_url(&html_search, list_page_pagination_query->next_page_html_query, &next_page_userdata); + + cleanup: + quickmedia_html_search_deinit(&html_search); + + if(starts_with(current_image_url, "//")) + current_image_url = "https://" + current_image_url.substr(2); + else if(starts_with(current_image_url, "/")) + current_image_url = website_url + current_image_url.substr(1); + + if(!callback(current_image_url)) + break; + } + + return ImageResult::OK; + } + } + return ImageResult::OK; + } + + ImageResult MangaGenericImagesPage::get_page_image_urls() { + if(!prev_chapter_url.empty()) + return ImageResult::OK; + + assert(list_page_query->type == ListPageQueryType::IMAGES); + const ListPageImagesQuery *list_page_images_query = &list_page_query->images_query; + if(!list_page_images_query->html_query || !list_page_images_query->field_name) { + assert(false); + return ImageResult::ERR; + } + + HtmlListPageImagesUserdata list_page_images_userdata; + list_page_images_userdata.urls = &chapter_image_urls; + list_page_images_userdata.field_name = list_page_images_query->field_name; + list_page_images_userdata.field_contains = list_page_images_query->field_contains; + + std::string website_data; + if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) + return ImageResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, list_page_images_query->html_query, + [](QuickMediaHtmlNode *node, void *userdata) { + HtmlListPageImagesUserdata *list_page_images_userdata = (HtmlListPageImagesUserdata*)userdata; + const char *field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); + if(field1_value && (!list_page_images_userdata->field_contains || strstr(field1_value, list_page_images_userdata->field_contains))) { + list_page_images_userdata->urls->push_back(strip(field1_value)); + } + }, &list_page_images_userdata); + + if(result == 0 && !chapter_image_urls.empty() && list_page_images_query->post_handler) + list_page_images_query->post_handler(chapter_image_urls); + + for(std::string &url : chapter_image_urls) { + if(starts_with(url, "//")) + url = "https://" + url.substr(2); + else if(starts_with(url, "/")) + url = website_url + url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result != 0 || chapter_image_urls.empty()) { + chapter_image_urls.clear(); + return ImageResult::ERR; + } + + prev_chapter_url = url; + return ImageResult::OK; + } + + + MangaGenericSearchPage& MangaGenericSearchPage::search_handler(const char *search_prefix, const char *page_prefix, int page_start) { + search_query.search_prefix = search_prefix; + search_query.page_prefix = page_prefix; + search_query.page_start = page_start; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::text_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { + text_query.html_query = html_query; + text_query.title_field = title_field; + text_query.url_field = url_field; + text_query.url_contains = url_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::thumbnail_handler(const char *html_query, const char *field_name, const char *field_contains) { + thumbnail_query.html_query = html_query; + thumbnail_query.field_name = field_name; + thumbnail_query.field_contains = field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_chapters_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { + list_chapters_query.html_query = html_query; + list_chapters_query.title_field = title_field; + list_chapters_query.url_field = url_field; + list_chapters_query.url_contains = url_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_chapters_uploaded_time_handler(const char *html_query, const char *field_name, const char *field_contains) { + list_chapters_query.uploaded_time_html_query = html_query; + list_chapters_query.uploaded_time_field_name = field_name; + list_chapters_query.uploaded_time_field_contains = field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_page_images_handler(const char *html_query, const char *field_name, const char *field_contains, ListPageImagesQueryPost post_handler) { + list_page_query.type = ListPageQueryType::IMAGES; + list_page_query.images_query.html_query = html_query; + list_page_query.images_query.field_name = field_name; + list_page_query.images_query.field_contains = field_contains; + list_page_query.images_query.post_handler = post_handler; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::list_page_images_pagination_handler( + const char *pages_html_query, const char *pages_field_name, const char *pages_field_contains, ListPagePaginationPagesPost pages_post_handler, + const char *image_html_query, const char *image_field_name, const char *image_field_contains, + const char *next_page_html_query, const char *next_page_field_name, const char *next_page_field_contains) + { + assert(pages_post_handler); + list_page_query.type = ListPageQueryType::PAGINATION; + list_page_query.pagination_query.pages_html_query = pages_html_query; + list_page_query.pagination_query.pages_field_name = pages_field_name; + list_page_query.pagination_query.pages_field_contains = pages_field_contains; + list_page_query.pagination_query.pages_post_handler = pages_post_handler; + + list_page_query.pagination_query.image_html_query = image_html_query; + list_page_query.pagination_query.image_field_name = image_field_name; + list_page_query.pagination_query.image_field_contains = image_field_contains; + + list_page_query.pagination_query.next_page_html_query = next_page_html_query; + list_page_query.pagination_query.next_page_field_name = next_page_field_name; + list_page_query.pagination_query.next_page_field_contains = next_page_field_contains; + return *this; + } + + MangaGenericSearchPage& MangaGenericSearchPage::manga_id_handler(const char *prefix, const char *end) { + manga_id_extractor.prefix = prefix; + manga_id_extractor.end = end; + return *this; + } +} \ No newline at end of file diff --git a/src/plugins/Mangadex.cpp b/src/plugins/Mangadex.cpp index 44c9762..0d0c601 100644 --- a/src/plugins/Mangadex.cpp +++ b/src/plugins/Mangadex.cpp @@ -219,7 +219,28 @@ namespace QuickMedia { return true; } - ImageResult MangadexImagesPage::get_page_image_urls(std::vector &urls) { + ImageResult MangadexImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + + ImageResult MangadexImagesPage::for_each_page_in_chapter(PageCallback callback) { + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + + ImageResult MangadexImagesPage::get_image_urls_for_chapter(const std::string &url) { + if(!chapter_image_urls.empty()) + return ImageResult::OK; + std::string cookie_filepath; if(!get_cookie_filepath(cookie_filepath)) return ImageResult::ERR; @@ -261,11 +282,11 @@ namespace QuickMedia { continue; std::string image_url = server + chapter_hash_str + "/" + image_name.asCString(); - urls.push_back(std::move(image_url)); + chapter_image_urls.push_back(std::move(image_url)); } } - if(urls.empty()) + if(chapter_image_urls.empty()) return ImageResult::ERR; return ImageResult::OK; diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp index b67acb2..e63ff6c 100644 --- a/src/plugins/Manganelo.cpp +++ b/src/plugins/Manganelo.cpp @@ -226,7 +226,28 @@ namespace QuickMedia { return PluginResult::OK; } - ImageResult ManganeloImagesPage::get_page_image_urls(std::vector &urls) { + ImageResult ManganeloImagesPage::get_number_of_images(int &num_images) { + num_images = 0; + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + num_images = chapter_image_urls.size(); + return ImageResult::OK; + } + + ImageResult ManganeloImagesPage::for_each_page_in_chapter(PageCallback callback) { + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) return image_result; + for(const std::string &url : chapter_image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + + ImageResult ManganeloImagesPage::get_image_urls_for_chapter(const std::string &url) { + if(!chapter_image_urls.empty()) + return ImageResult::OK; + std::string website_data; if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) return ImageResult::NET_ERR; @@ -244,16 +265,14 @@ namespace QuickMedia { std::string image_url = strip(src); urls->push_back(std::move(image_url)); } - }, &urls); + }, &chapter_image_urls); cleanup: quickmedia_html_search_deinit(&html_search); - if(result != 0) - return ImageResult::ERR; - - if(urls.empty()) + if(result != 0 || chapter_image_urls.empty()) { + chapter_image_urls.clear(); return ImageResult::ERR; - + } return ImageResult::OK; } } \ No newline at end of file diff --git a/src/plugins/Manganelos.cpp b/src/plugins/Manganelos.cpp deleted file mode 100644 index 04d9ca2..0000000 --- a/src/plugins/Manganelos.cpp +++ /dev/null @@ -1,154 +0,0 @@ -#include "../../plugins/Manganelos.hpp" -#include "../../include/Notification.hpp" -#include "../../include/StringUtils.hpp" -#include "../../include/NetUtils.hpp" -#include - -namespace QuickMedia { - static SearchResult search_page(const std::string &str, int page, BodyItems &result_items) { - std::string url = "http://manganelos.com/search?q="; - url += url_param_encode(str); - url += "&page=" + std::to_string(page); - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return SearchResult::NET_ERR; - - if(website_data.empty()) - return SearchResult::OK; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='media-left cover-manga']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - if(href && title && strstr(href, "/manga/")) { - auto item = BodyItem::create(strip(title)); - item->url = strip(href); - item_data->push_back(std::move(item)); - } - }, &result_items); - - BodyItemContext body_item_image_context; - body_item_image_context.body_items = &result_items; - body_item_image_context.index = 0; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='media-left cover-manga']//img[class='media-object']", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && strstr(src, "/mangaimage/") && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; - item_data->index++; - } - }, &body_item_image_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - return SearchResult::OK; - } - - SearchResult ManganelosSearchPage::search(const std::string &str, BodyItems &result_items) { - return search_page(str, 1, result_items); - } - - PluginResult ManganelosSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - return search_result_to_plugin_result(search_page(str, 1 + page, result_items)); - } - - PluginResult ManganelosSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - BodyItems chapters_items; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return PluginResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//section[id='examples']//div[class='chapter-list']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text) { - auto item = BodyItem::create(strip(text)); - item->url = strip(href); - item_data->push_back(std::move(item)); - } - }, &chapters_items); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return PluginResult::ERR; - - auto body = create_body(); - body->items = std::move(chapters_items); - result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); - return PluginResult::OK; - } - - PluginResult ManganelosChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url), nullptr}); - return PluginResult::OK; - } - - bool ManganelosChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { - size_t start_index = url.find("/manga/"); - if(start_index == std::string::npos) - return false; - - start_index += 7; - size_t end_index = url.find("?", start_index); - if(end_index == std::string::npos) { - manga_id = url.substr(start_index); - return true; - } - - manga_id = url.substr(start_index, end_index - start_index); - return true; - } - - ImageResult ManganelosImagesPage::get_page_image_urls(std::vector &urls) { - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return ImageResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//p[id='arraydata']", - [](QuickMediaHtmlNode *node, void *userdata) { - std::vector *chapter_image_urls = (std::vector*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text) { - string_split(text, ',', [chapter_image_urls](const char *str, size_t size) { - std::string url(str, size); - url = strip(url); - chapter_image_urls->push_back(std::move(url)); - return true; - }); - } - }, &urls); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return ImageResult::ERR; - - if(urls.empty()) - return ImageResult::ERR; - - return ImageResult::OK; - } -} \ No newline at end of file diff --git a/src/plugins/Mangatown.cpp b/src/plugins/Mangatown.cpp deleted file mode 100644 index 4db1ebc..0000000 --- a/src/plugins/Mangatown.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include "../../plugins/Mangatown.hpp" -#include "../../include/Notification.hpp" -#include "../../include/StringUtils.hpp" -#include "../../include/NetUtils.hpp" -#include - -static const std::string mangatown_url = "https://www.mangatown.com"; - -namespace QuickMedia { - static bool is_number_with_zero_fill(const char *str) { - while(*str == '0') { ++str; } - return atoi(str) != 0; - } - - static SearchResult search_page(const std::string &str, int page, BodyItems &result_items) { - std::string url = "https://www.mangatown.com/search?name="; - url += url_param_encode(str); - url += "&page=" + std::to_string(page); - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return SearchResult::NET_ERR; - - if(website_data.empty()) - return SearchResult::OK; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//p[class='title']/a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - if(href && title && strncmp(href, "/manga/", 7) == 0) { - auto item = BodyItem::create(strip(title)); - item->url = mangatown_url + strip(href); - item_data->push_back(std::move(item)); - } - }, &result_items); - - BodyItemContext body_item_image_context; - body_item_image_context.body_items = &result_items; - body_item_image_context.index = 0; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//a[class='manga_cover']/img", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; - item_data->index++; - } - }, &body_item_image_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - return SearchResult::OK; - } - - SearchResult MangatownSearchPage::search(const std::string &str, BodyItems &result_items) { - return search_page(str, 1, result_items); - } - - PluginResult MangatownSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - return search_result_to_plugin_result(search_page(str, 1 + page, result_items)); - } - - PluginResult MangatownSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - BodyItems chapters_items; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return PluginResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='chapter_list']//a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text && strncmp(href, "/manga/", 7) == 0) { - auto item = BodyItem::create(strip(text)); - item->url = mangatown_url + strip(href); - item_data->push_back(std::move(item)); - } - }, &chapters_items); - - BodyItemContext body_item_context; - body_item_context.body_items = &chapters_items; - body_item_context.index = 0; - - quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='chapter_list']//span[class='time']", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *item_data = (BodyItemContext*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text && item_data->index < item_data->body_items->size()) { - std::string uploaded_date = strip(text); - (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + uploaded_date); - item_data->index++; - } - }, &body_item_context); - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0) - return PluginResult::ERR; - - auto body = create_body(); - body->items = std::move(chapters_items); - result_tabs.push_back(Tab{std::move(body), std::make_unique(program, title, url), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); - return PluginResult::OK; - } - - PluginResult MangatownChaptersPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { - result_tabs.push_back(Tab{nullptr, std::make_unique(program, content_title, title, url), nullptr}); - return PluginResult::OK; - } - - bool MangatownChaptersPage::extract_id_from_url(const std::string &url, std::string &manga_id) const { - size_t start_index = url.find("/manga/"); - if(start_index == std::string::npos) - return false; - - start_index += 7; - size_t end_index = url.find("/", start_index); - if(end_index == std::string::npos) { - manga_id = url.substr(start_index); - return true; - } - - manga_id = url.substr(start_index, end_index - start_index); - return true; - } - - // First page = 0 - static std::string first_image_url_to_page_image_url(const std::string &image_url, int page) { - size_t slash_index = image_url.rfind('/'); - if(slash_index == std::string::npos) - return ""; - - size_t dot_index = image_url.rfind('.'); - if(dot_index == std::string::npos || dot_index <= slash_index) - return ""; - - std::string filename_without_ext = image_url.substr(slash_index + 1, dot_index - (slash_index + 1)); - if(!filename_without_ext.empty() && filename_without_ext.back() == '1') - ++page; - - std::string page_str = std::to_string(page); - if(page_str.size() > filename_without_ext.size()) - return ""; - - filename_without_ext.replace(filename_without_ext.size() - page_str.size(), page_str.size(), page_str); - return image_url.substr(0, slash_index) + '/' + filename_without_ext + image_url.substr(dot_index); - } - - ImageResult MangatownImagesPage::get_page_image_urls(std::vector &urls) { - std::string image_src; - int num_pages = 0; - - std::string website_data; - if(download_to_string(url, website_data, {}, true) != DownloadResult::OK) - return ImageResult::NET_ERR; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='viewer']//img", - [](QuickMediaHtmlNode *node, void *userdata) { - std::string *image_src = (std::string*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && strstr(src, "/store/manga/")) { - if(strncmp(src, "//", 2) == 0) - *image_src = strip(src + 2); - else - *image_src = strip(src); - } - }, &image_src); - - if(result != 0 || image_src.empty()) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='page_select']//option", - [](QuickMediaHtmlNode *node, void *userdata) { - int *last_num_pages = (int*)userdata; - const char *value = quickmedia_html_node_get_attribute_value(node, "value"); - const char *text = quickmedia_html_node_get_text(node); - if(value && strncmp(value, "/manga/", 7) == 0) { - if(is_number_with_zero_fill(text)) { - (*last_num_pages)++; - } - } - }, &num_pages); - - num_pages /= 2; - - cleanup: - quickmedia_html_search_deinit(&html_search); - if(result != 0 || image_src.empty() || num_pages == 0) - return ImageResult::ERR; - - for(int i = 0; i < num_pages; ++i) { - urls.push_back(first_image_url_to_page_image_url(image_src, i)); - } - - return ImageResult::OK; - } -} \ No newline at end of file -- cgit v1.2.3