From f34a17de0d867a8788a63ae9a955052744815922 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 17 Sep 2022 13:52:20 +0200 Subject: Mangakatana: fix one page again... --- src/QuickMedia.cpp | 97 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 30 deletions(-) (limited to 'src/QuickMedia.cpp') diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index df98a5a..c9e52c1 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -834,31 +834,68 @@ namespace QuickMedia { .manga_id_handler("/manga/", "/"); } - static std::vector get_javascript_array_string(const std::string &html_source, const std::string &var_name) { - std::vector arr; - size_t sources_start = html_source.find(var_name + "=["); - if(sources_start == std::string::npos) - return arr; - - sources_start += var_name.size() + 1; // just before [ - size_t json_end = find_end_of_json_array(html_source.c_str(), sources_start, html_source.size()); - if(json_end == size_t(-1)) - return arr; - - sources_start += 1; - json_end -= 1; - std::string urls_str = html_source.substr(sources_start, json_end - sources_start); - string_replace_all(urls_str, "'", ""); - - string_split(urls_str, ',', [&arr](const char *str, size_t size) { - std::string url(str, size); - url = strip(url); - if(!url.empty()) - arr.push_back(std::move(url)); - return true; - }); + static std::vector extract_javascript_sections(const std::string &html_source) { + std::vector sections; + size_t start = 0; + + while(true) { + start = html_source.find("", start); + if(start == std::string::npos) + break; + + start += 1; + size_t end = html_source.find("", start); + if(end == std::string::npos) + break; - return arr; + sections.push_back(html_source.substr(start, end - start)); + start = end + 9; + } + + return sections; + } + + static std::vector get_javascript_string_arrays_unique(const std::string &js_source) { + std::vector arrays; + size_t start = 0; + + while(true) { + start = js_source.find("=['", start); + if(start == std::string::npos) + break; + + start += 3; + size_t end = js_source.find("]", start); + if(end == std::string::npos) + break; + + size_t sources_start = start - 2; // just before [ + size_t json_end = find_end_of_json_array(js_source.c_str(), sources_start, js_source.size()); + if(json_end == size_t(-1)) + break; + + sources_start += 1; + json_end -= 1; + std::string urls_str = js_source.substr(sources_start, json_end - sources_start); + string_replace_all(urls_str, "'", ""); + + string_split(urls_str, ',', [&arrays](const char *str, size_t size) { + std::string url(str, size); + url = strip(url); + if(!url.empty() && (arrays.empty() || arrays.back() != url)) + arrays.push_back(std::move(url)); + return true; + }); + + start = end + 1; + } + + return arrays; } static void add_mangakatana_handlers(MangaGenericSearchPage *manga_generic_search_page) { @@ -879,12 +916,12 @@ namespace QuickMedia { .list_chapters_handler("//div[class='chapters']//div[class='chapter']//a[0]", "text", "href", "/manga/") .list_chapters_uploaded_time_handler("//div[class='chapters']//div[class='update_time']", "text", nullptr) .list_page_images_custom_handler([](const std::string &html_source) { - std::vector urls_ytaw = get_javascript_array_string(html_source, "ytaw"); - std::vector urls_htnc = get_javascript_array_string(html_source, "htnc"); - if(urls_ytaw.size() >= urls_htnc.size()) - return urls_ytaw; - else - return urls_htnc; + std::vector urls; + for(const std::string &js_section : extract_javascript_sections(html_source)) { + std::vector js_string_array = get_javascript_string_arrays_unique(js_section); + urls.insert(urls.end(), std::move_iterator(js_string_array.begin()), std::move_iterator(js_string_array.end())); + } + return urls; }) .manga_id_handler("/manga/", nullptr); } -- cgit v1.2.3