From 9799803529c57930a0e7f12e45cbcf2b2e4419eb Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 28 May 2020 02:08:40 +0200 Subject: Add support for mangadex --- src/plugins/Mangadex.cpp | 377 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 src/plugins/Mangadex.cpp (limited to 'src/plugins/Mangadex.cpp') diff --git a/src/plugins/Mangadex.cpp b/src/plugins/Mangadex.cpp new file mode 100644 index 0000000..43368af --- /dev/null +++ b/src/plugins/Mangadex.cpp @@ -0,0 +1,377 @@ +#include "../../plugins/Mangadex.hpp" +#include "../../include/Storage.hpp" +#include +#include + +static const std::string mangadex_url = "https://mangadex.org"; +// TODO: Allow selecting other languages than english +static const char *language_code = "1"; // english +static const std::string useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; + +namespace QuickMedia { + static std::string title_url_extract_manga_id(const std::string &url) { + size_t find_index = url.find("/title/"); + if(find_index == std::string::npos) + return ""; + + size_t id_start_index = find_index + 7; + size_t end_index = url.find("/", id_start_index); + if(end_index == std::string::npos) + return url.substr(id_start_index); + + return url.substr(id_start_index, end_index - id_start_index); + } + + static std::string chapter_url_extract_manga_id(const std::string &url) { + size_t find_index = url.find("/chapter/"); + if(find_index == std::string::npos) + return ""; + return url.substr(find_index + 9); + } + + struct BodyItemChapterContext { + BodyItems *body_items; + int prev_chapter_number; + bool *is_last_page; + }; + + SearchResult Mangadex::search_page(const std::string &url, BodyItems &result_items, int page, bool *is_last_page) { + *is_last_page = true; + CommandArg user_agent_arg = { "-H", useragent_str }; + + std::string chapter_url = url; + if(chapter_url[0] != '/') + chapter_url += "/"; + chapter_url += "chapters/" + std::to_string(page) + "/"; + std::string website_data; + if(download_to_string(chapter_url, website_data, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) + return SearchResult::NET_ERR; + + std::string manga_id = title_url_extract_manga_id(chapter_url); + std::string query = "//div[data-manga-id='" + manga_id + "']"; + + BodyItemChapterContext body_item_chapter_context; + body_item_chapter_context.body_items = &result_items; + body_item_chapter_context.prev_chapter_number = -1; + body_item_chapter_context.is_last_page = is_last_page; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, query.c_str(), + [](QuickMediaHtmlNode *node, void *userdata) { + auto *item_data = (BodyItemChapterContext*)userdata; + const char *data_lang = quickmedia_html_node_get_attribute_value(node, "data-lang"); + if(strcmp(data_lang, language_code) != 0) + return; + + const char *chapter_id = quickmedia_html_node_get_attribute_value(node, "data-id"); + if(!chapter_id) + return; + + const char *chapter_number_str = quickmedia_html_node_get_attribute_value(node, "data-chapter"); + if(!chapter_number_str) + return; + + int chapter_number = atoi(chapter_number_str); + if(chapter_number == 0 || chapter_number == item_data->prev_chapter_number) + return; + + item_data->prev_chapter_number = chapter_number; + + const char *chapter_title = quickmedia_html_node_get_attribute_value(node, "data-title"); + std::string chapter_url = mangadex_url + "/chapter/" + chapter_id; + std::string chapter_name = std::string("Ch. ") + chapter_number_str; + if(chapter_title) + chapter_name += std::string(" - ") + chapter_title; + + auto item = std::make_unique(std::move(chapter_name)); + item->url = std::move(chapter_url); + item_data->body_items->push_back(std::move(item)); + *item_data->is_last_page = false; + }, &body_item_chapter_context); + + cleanup: + quickmedia_html_search_deinit(&html_search); + return result == 0 ? SearchResult::OK : SearchResult::ERR; + } + + // TODO: Make pagination asynchronous and make it go to the next page when navigating to the bottom in the list of chapters + // in the GUI. Currently all pages are fetched at once, synchronously. This can be very slow for certain manga like Naruto + // which has 21 pages of chapters... + SearchResult Mangadex::search(const std::string &url, BodyItems &result_items) { +#if 0 + int page = 1; + while(true) { + bool is_last_page; + SearchResult search_result = search_page(url, result_items, page, &is_last_page); + if(search_result != SearchResult::OK) + return search_result; + + ++page; + if(is_last_page) + break; + } + return SearchResult::OK; +#else + CommandArg user_agent_arg = { "-H", useragent_str }; + + std::string manga_id = title_url_extract_manga_id(url); + std::string request_url = "https://mangadex.org/api/?id=" + manga_id + "&type=manga"; + std::string server_response; + if(download_to_string(request_url, server_response, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) + return SearchResult::NET_ERR; + + if(server_response.empty()) + return SearchResult::OK; + + Json::Value json_root; + Json::CharReaderBuilder json_builder; + std::unique_ptr json_reader(json_builder.newCharReader()); + std::string json_errors; + if(!json_reader->parse(&server_response[0], &server_response[server_response.size()], &json_root, &json_errors)) { + fprintf(stderr, "Mangadex search json error: %s\n", json_errors.c_str()); + return SearchResult::ERR; + } + + Json::Value &status_json = json_root["status"]; + if(!status_json.isString() || status_json.asString() != "OK") + return SearchResult::ERR; + + Json::Value &chapter_json = json_root["chapter"]; + if(!chapter_json.isObject()) + return SearchResult::ERR; + + std::vector> chapters(chapter_json.size()); + for(auto &member_name : chapter_json.getMemberNames()) { + Json::Value chapter = chapter_json[member_name]; + if(chapter.isObject()) + chapters.push_back(std::make_pair(member_name, std::move(chapter))); + } + + std::sort(chapters.begin(), chapters.end(), [](std::pair &a, std::pair &b) { + Json::Value &a_timestamp_json = a.second["timestamp"]; + Json::Value &b_timestamp_json = b.second["timestamp"]; + int64_t a_timestamp = 0; + int64_t b_timestamp = 0; + if(a_timestamp_json.isInt64()) + a_timestamp = a_timestamp_json.asInt64(); + if(b_timestamp_json.isInt64()) + b_timestamp = b_timestamp_json.asInt64(); + return a_timestamp > b_timestamp; + }); + + int prev_chapter_number = -1; + for(auto it = chapters.begin(); it != chapters.end(); ++it) { + const std::string &chapter_id = it->first; + Json::Value &chapter = it->second; + + Json::Value &lang_code = chapter["lang_code"]; + // TODO: Allow selecting other languages than english + if(!lang_code.isString() || lang_code.asString() != "gb") + continue; + + Json::Value &chapter_number_json = chapter["chapter"]; + if(!chapter_number_json.isString()) + continue; + + int chapter_number = atoi(chapter_number_json.asCString()); + if(chapter_number == 0 || chapter_number == prev_chapter_number) + continue; + prev_chapter_number = chapter_number; + + Json::Value &chapter_title_json = chapter["title"]; + std::string chapter_url = mangadex_url + "/chapter/" + chapter_id; + std::string chapter_name = std::string("Ch. ") + chapter_number_json.asCString(); + if(chapter_title_json.isString()) + chapter_name += std::string(" - ") + chapter_title_json.asCString(); + + auto item = std::make_unique(std::move(chapter_name)); + item->url = std::move(chapter_url); + result_items.push_back(std::move(item)); + } + return SearchResult::OK; +#endif + } + + static bool get_rememberme_token(std::string &rememberme_token) { + Path mangadex_credentials_path = get_storage_dir().join("credentials").join("mangadex.json"); + std::string mangadex_credentials; + if(file_get_content(mangadex_credentials_path, mangadex_credentials) != 0) { + fprintf(stderr, "Failed to get content of file: %s\n", mangadex_credentials_path.data.c_str()); + return false; + } + + Json::Value json_root; + Json::CharReaderBuilder json_builder; + std::unique_ptr json_reader(json_builder.newCharReader()); + std::string json_errors; + if(!json_reader->parse(&mangadex_credentials[0], &mangadex_credentials[mangadex_credentials.size()], &json_root, &json_errors)) { + fprintf(stderr, "Mangadex credentials json error: %s\n", json_errors.c_str()); + return false; + } + + if(json_root.isObject()) { + Json::Value &rememberme_token_json = json_root["rememberme_token"]; + if(rememberme_token_json.isString()) { + rememberme_token = rememberme_token_json.asString(); + return true; + } + } + return true; + } + + struct BodyItemImageContext { + BodyItems *body_items; + size_t index; + }; + + // TODO: Implement pagination (go to next page and get all results as well) + SuggestionResult Mangadex::update_search_suggestions(const std::string &text, BodyItems &result_items) { + std::string rememberme_token; + if(!get_rememberme_token(rememberme_token)) + return SuggestionResult::ERR; + + std::string url = "https://mangadex.org/search?title="; + url += url_param_encode(text); + CommandArg cookie_arg = { "-H", "cookie: mangadex_rememberme_token=" + rememberme_token }; + CommandArg user_agent_arg = { "-H", useragent_str }; + + std::string website_data; + if(download_to_string(url, website_data, {std::move(cookie_arg), std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) + return SuggestionResult::NET_ERR; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, "//a", + [](QuickMediaHtmlNode *node, void *userdata) { + auto *item_data = (BodyItems*)userdata; + const char *href = quickmedia_html_node_get_attribute_value(node, "href"); + const char *title = quickmedia_html_node_get_attribute_value(node, "title"); + if(title && href && strncmp(href, "/title/", 7) == 0) { + auto item = std::make_unique(strip(title)); + item->url = mangadex_url + href; + item_data->push_back(std::move(item)); + } + }, &result_items); + + BodyItemImageContext body_item_image_context; + body_item_image_context.body_items = &result_items; + body_item_image_context.index = 0; + + result = quickmedia_html_find_nodes_xpath(&html_search, "//img", + [](QuickMediaHtmlNode *node, void *userdata) { + auto *item_data = (BodyItemImageContext*)userdata; + const char *src = quickmedia_html_node_get_attribute_value(node, "src"); + if(src && strncmp(src, "/images/manga/", 14) == 0) { + if(item_data->index < item_data->body_items->size()) { + (*item_data->body_items)[item_data->index]->thumbnail_url = mangadex_url + src; + item_data->index++; + } + } + }, &body_item_image_context); + + cleanup: + quickmedia_html_search_deinit(&html_search); + return result == 0 ? SuggestionResult::OK : SuggestionResult::ERR; + } + + ImageResult Mangadex::get_number_of_images(const std::string &url, int &num_images) { + std::lock_guard lock(image_urls_mutex); + num_images = 0; + ImageResult image_result = get_image_urls_for_chapter(url); + if(image_result != ImageResult::OK) + return image_result; + + num_images = last_chapter_image_urls.size(); + return ImageResult::OK; + } + + ImageResult Mangadex::get_image_urls_for_chapter(const std::string &url) { + if(url == last_chapter_url) + return ImageResult::OK; + + last_chapter_image_urls.clear(); + + CommandArg user_agent_arg = { "-H", useragent_str }; + std::string manga_id = chapter_url_extract_manga_id(url); + std::string request_url = mangadex_url + "/api/?id=" + manga_id + "&server=null&type=chapter"; + std::string server_response; + if(download_to_string(request_url, server_response, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) + return ImageResult::NET_ERR; + + if(server_response.empty()) + return ImageResult::OK; + + Json::Value json_root; + Json::CharReaderBuilder json_builder; + std::unique_ptr json_reader(json_builder.newCharReader()); + std::string json_errors; + if(!json_reader->parse(&server_response[0], &server_response[server_response.size()], &json_root, &json_errors)) { + fprintf(stderr, "Mangadex image urls json error: %s\n", json_errors.c_str()); + return ImageResult::ERR; + } + + Json::Value &status_json = json_root["status"]; + if(!status_json.isString() || status_json.asString() != "OK") + return ImageResult::ERR; + + Json::Value &chapter_hash = json_root["hash"]; + if(!chapter_hash.isString()) + return ImageResult::ERR; + const char *chapter_hash_str = chapter_hash.asCString(); + + Json::Value &server_json = json_root["server"]; + std::string server; + if(server_json.isString()) + server = server_json.asString(); + else + server = mangadex_url + "/data/"; + + Json::Value &page_array = json_root["page_array"]; + if(page_array.isArray()) { + for(const Json::Value &image_name : page_array) { + if(!image_name.isString()) + continue; + + std::string image_url = server + chapter_hash_str + "/" + image_name.asCString(); + last_chapter_image_urls.push_back(std::move(image_url)); + } + } + + last_chapter_url = url; + if(last_chapter_image_urls.empty()) { + last_chapter_url.clear(); + return ImageResult::ERR; + } + return ImageResult::OK; + } + + ImageResult Mangadex::for_each_page_in_chapter(const std::string &chapter_url, PageCallback callback) { + std::vector image_urls; + { + std::lock_guard lock(image_urls_mutex); + ImageResult image_result = get_image_urls_for_chapter(chapter_url); + if(image_result != ImageResult::OK) + return image_result; + + image_urls = last_chapter_image_urls; + } + + for(const std::string &url : image_urls) { + if(!callback(url)) + break; + } + return ImageResult::OK; + } + + bool Mangadex::extract_id_from_url(const std::string &url, std::string &manga_id) { + manga_id = title_url_extract_manga_id(url); + return true; + } +} -- cgit v1.2.3