#include "../../plugins/Mangadex.hpp" #include "../../include/Storage.hpp" #include #include static const std::string mangadex_url = "https://mangadex.org"; // TODO: Allow selecting other languages than english static const char *language_code = "1"; // english static const std::string useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; namespace QuickMedia { static std::string title_url_extract_manga_id(const std::string &url) { size_t find_index = url.find("/title/"); if(find_index == std::string::npos) return ""; size_t id_start_index = find_index + 7; size_t end_index = url.find("/", id_start_index); if(end_index == std::string::npos) return url.substr(id_start_index); return url.substr(id_start_index, end_index - id_start_index); } static std::string chapter_url_extract_manga_id(const std::string &url) { size_t find_index = url.find("/chapter/"); if(find_index == std::string::npos) return ""; return url.substr(find_index + 9); } struct BodyItemChapterContext { BodyItems *body_items; int prev_chapter_number; bool *is_last_page; }; SearchResult Mangadex::search_page(const std::string &url, BodyItems &result_items, int page, bool *is_last_page) { *is_last_page = true; CommandArg user_agent_arg = { "-H", useragent_str }; std::string chapter_url = url; if(chapter_url[0] != '/') chapter_url += "/"; chapter_url += "chapters/" + std::to_string(page) + "/"; std::string website_data; if(download_to_string(chapter_url, website_data, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) return SearchResult::NET_ERR; std::string manga_id = title_url_extract_manga_id(chapter_url); std::string query = "//div[data-manga-id='" + manga_id + "']"; BodyItemChapterContext body_item_chapter_context; body_item_chapter_context.body_items = &result_items; body_item_chapter_context.prev_chapter_number = -1; body_item_chapter_context.is_last_page = is_last_page; QuickMediaHtmlSearch html_search; int result = quickmedia_html_search_init(&html_search, website_data.c_str()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, query.c_str(), [](QuickMediaHtmlNode *node, void *userdata) { auto *item_data = (BodyItemChapterContext*)userdata; const char *data_lang = quickmedia_html_node_get_attribute_value(node, "data-lang"); if(strcmp(data_lang, language_code) != 0) return; const char *chapter_id = quickmedia_html_node_get_attribute_value(node, "data-id"); if(!chapter_id) return; const char *chapter_number_str = quickmedia_html_node_get_attribute_value(node, "data-chapter"); if(!chapter_number_str) return; int chapter_number = atoi(chapter_number_str); if(chapter_number == 0 || chapter_number == item_data->prev_chapter_number) return; item_data->prev_chapter_number = chapter_number; const char *chapter_title = quickmedia_html_node_get_attribute_value(node, "data-title"); std::string chapter_url = mangadex_url + "/chapter/" + chapter_id; std::string chapter_name = std::string("Ch. ") + chapter_number_str; if(chapter_title) chapter_name += std::string(" - ") + chapter_title; auto item = std::make_unique(std::move(chapter_name)); item->url = std::move(chapter_url); item_data->body_items->push_back(std::move(item)); *item_data->is_last_page = false; }, &body_item_chapter_context); cleanup: quickmedia_html_search_deinit(&html_search); return result == 0 ? SearchResult::OK : SearchResult::ERR; } // TODO: Make pagination asynchronous and make it go to the next page when navigating to the bottom in the list of chapters // in the GUI. Currently all pages are fetched at once, synchronously. This can be very slow for certain manga like Naruto // which has 21 pages of chapters... SearchResult Mangadex::search(const std::string &url, BodyItems &result_items) { #if 0 int page = 1; while(true) { bool is_last_page; SearchResult search_result = search_page(url, result_items, page, &is_last_page); if(search_result != SearchResult::OK) return search_result; ++page; if(is_last_page) break; } return SearchResult::OK; #else CommandArg user_agent_arg = { "-H", useragent_str }; std::string manga_id = title_url_extract_manga_id(url); std::string request_url = "https://mangadex.org/api/?id=" + manga_id + "&type=manga"; std::string server_response; if(download_to_string(request_url, server_response, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) return SearchResult::NET_ERR; if(server_response.empty()) return SearchResult::OK; Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; if(!json_reader->parse(&server_response[0], &server_response[server_response.size()], &json_root, &json_errors)) { fprintf(stderr, "Mangadex search json error: %s\n", json_errors.c_str()); return SearchResult::ERR; } Json::Value &status_json = json_root["status"]; if(!status_json.isString() || status_json.asString() != "OK") return SearchResult::ERR; Json::Value &chapter_json = json_root["chapter"]; if(!chapter_json.isObject()) return SearchResult::ERR; std::vector> chapters(chapter_json.size()); for(auto &member_name : chapter_json.getMemberNames()) { Json::Value chapter = chapter_json[member_name]; if(chapter.isObject()) chapters.push_back(std::make_pair(member_name, std::move(chapter))); } std::sort(chapters.begin(), chapters.end(), [](std::pair &a, std::pair &b) { Json::Value &a_timestamp_json = a.second["timestamp"]; Json::Value &b_timestamp_json = b.second["timestamp"]; int64_t a_timestamp = 0; int64_t b_timestamp = 0; if(a_timestamp_json.isInt64()) a_timestamp = a_timestamp_json.asInt64(); if(b_timestamp_json.isInt64()) b_timestamp = b_timestamp_json.asInt64(); return a_timestamp > b_timestamp; }); int prev_chapter_number = -1; for(auto it = chapters.begin(); it != chapters.end(); ++it) { const std::string &chapter_id = it->first; Json::Value &chapter = it->second; Json::Value &lang_code = chapter["lang_code"]; // TODO: Allow selecting other languages than english if(!lang_code.isString() || lang_code.asString() != "gb") continue; Json::Value &chapter_number_json = chapter["chapter"]; if(!chapter_number_json.isString()) continue; int chapter_number = atoi(chapter_number_json.asCString()); if(chapter_number == 0 || chapter_number == prev_chapter_number) continue; prev_chapter_number = chapter_number; Json::Value &chapter_title_json = chapter["title"]; std::string chapter_url = mangadex_url + "/chapter/" + chapter_id; std::string chapter_name = std::string("Ch. ") + chapter_number_json.asCString(); if(chapter_title_json.isString()) chapter_name += std::string(" - ") + chapter_title_json.asCString(); auto item = std::make_unique(std::move(chapter_name)); item->url = std::move(chapter_url); result_items.push_back(std::move(item)); } return SearchResult::OK; #endif } static bool get_rememberme_token(std::string &rememberme_token) { Path mangadex_credentials_path = get_storage_dir().join("credentials").join("mangadex.json"); std::string mangadex_credentials; if(file_get_content(mangadex_credentials_path, mangadex_credentials) != 0) { fprintf(stderr, "Failed to get content of file: %s\n", mangadex_credentials_path.data.c_str()); return false; } Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; if(!json_reader->parse(&mangadex_credentials[0], &mangadex_credentials[mangadex_credentials.size()], &json_root, &json_errors)) { fprintf(stderr, "Mangadex credentials json error: %s\n", json_errors.c_str()); return false; } if(json_root.isObject()) { Json::Value &rememberme_token_json = json_root["rememberme_token"]; if(rememberme_token_json.isString()) { rememberme_token = rememberme_token_json.asString(); return true; } } return true; } struct BodyItemImageContext { BodyItems *body_items; size_t index; }; // TODO: Implement pagination (go to next page and get all results as well) SuggestionResult Mangadex::update_search_suggestions(const std::string &text, BodyItems &result_items) { std::string rememberme_token; if(!get_rememberme_token(rememberme_token)) return SuggestionResult::ERR; std::string url = "https://mangadex.org/search?title="; url += url_param_encode(text); CommandArg cookie_arg = { "-H", "cookie: mangadex_rememberme_token=" + rememberme_token }; CommandArg user_agent_arg = { "-H", useragent_str }; std::string website_data; if(download_to_string(url, website_data, {std::move(cookie_arg), std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) return SuggestionResult::NET_ERR; QuickMediaHtmlSearch html_search; int result = quickmedia_html_search_init(&html_search, website_data.c_str()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//a", [](QuickMediaHtmlNode *node, void *userdata) { auto *item_data = (BodyItems*)userdata; const char *href = quickmedia_html_node_get_attribute_value(node, "href"); const char *title = quickmedia_html_node_get_attribute_value(node, "title"); if(title && href && strncmp(href, "/title/", 7) == 0) { auto item = std::make_unique(strip(title)); item->url = mangadex_url + href; item_data->push_back(std::move(item)); } }, &result_items); BodyItemImageContext body_item_image_context; body_item_image_context.body_items = &result_items; body_item_image_context.index = 0; result = quickmedia_html_find_nodes_xpath(&html_search, "//img", [](QuickMediaHtmlNode *node, void *userdata) { auto *item_data = (BodyItemImageContext*)userdata; const char *src = quickmedia_html_node_get_attribute_value(node, "src"); if(src && strncmp(src, "/images/manga/", 14) == 0) { if(item_data->index < item_data->body_items->size()) { (*item_data->body_items)[item_data->index]->thumbnail_url = mangadex_url + src; item_data->index++; } } }, &body_item_image_context); cleanup: quickmedia_html_search_deinit(&html_search); return result == 0 ? SuggestionResult::OK : SuggestionResult::ERR; } ImageResult Mangadex::get_number_of_images(const std::string &url, int &num_images) { std::lock_guard lock(image_urls_mutex); num_images = 0; ImageResult image_result = get_image_urls_for_chapter(url); if(image_result != ImageResult::OK) return image_result; num_images = last_chapter_image_urls.size(); return ImageResult::OK; } ImageResult Mangadex::get_image_urls_for_chapter(const std::string &url) { if(url == last_chapter_url) return ImageResult::OK; last_chapter_image_urls.clear(); CommandArg user_agent_arg = { "-H", useragent_str }; std::string manga_id = chapter_url_extract_manga_id(url); std::string request_url = mangadex_url + "/api/?id=" + manga_id + "&server=null&type=chapter"; std::string server_response; if(download_to_string(request_url, server_response, {std::move(user_agent_arg)}, use_tor) != DownloadResult::OK) return ImageResult::NET_ERR; if(server_response.empty()) return ImageResult::OK; Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; if(!json_reader->parse(&server_response[0], &server_response[server_response.size()], &json_root, &json_errors)) { fprintf(stderr, "Mangadex image urls json error: %s\n", json_errors.c_str()); return ImageResult::ERR; } Json::Value &status_json = json_root["status"]; if(!status_json.isString() || status_json.asString() != "OK") return ImageResult::ERR; Json::Value &chapter_hash = json_root["hash"]; if(!chapter_hash.isString()) return ImageResult::ERR; const char *chapter_hash_str = chapter_hash.asCString(); Json::Value &server_json = json_root["server"]; std::string server; if(server_json.isString()) server = server_json.asString(); else server = mangadex_url + "/data/"; Json::Value &page_array = json_root["page_array"]; if(page_array.isArray()) { for(const Json::Value &image_name : page_array) { if(!image_name.isString()) continue; std::string image_url = server + chapter_hash_str + "/" + image_name.asCString(); last_chapter_image_urls.push_back(std::move(image_url)); } } last_chapter_url = url; if(last_chapter_image_urls.empty()) { last_chapter_url.clear(); return ImageResult::ERR; } return ImageResult::OK; } ImageResult Mangadex::for_each_page_in_chapter(const std::string &chapter_url, PageCallback callback) { std::vector image_urls; { std::lock_guard lock(image_urls_mutex); ImageResult image_result = get_image_urls_for_chapter(chapter_url); if(image_result != ImageResult::OK) return image_result; image_urls = last_chapter_image_urls; } for(const std::string &url : image_urls) { if(!callback(url)) break; } return ImageResult::OK; } bool Mangadex::extract_id_from_url(const std::string &url, std::string &manga_id) { manga_id = title_url_extract_manga_id(url); return true; } }