From f34a17de0d867a8788a63ae9a955052744815922 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 17 Sep 2022 13:52:20 +0200 Subject: Mangakatana: fix one page again... --- TODO | 4 ++- src/QuickMedia.cpp | 97 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/TODO b/TODO index f91d993..b6cf6af 100644 --- a/TODO +++ b/TODO @@ -235,4 +235,6 @@ Fallback to playing videos/etc from origin homeserver in matrix if the file is l Fix 4chan posting! cloudflare broke shit. Then create external captcha solver program that solves captcha from captcha image. Add option to use invidious, and the invidious front page. Add proper vim modal mode. Maybe switch focused part with tab? then also need to show which part is focused. -Send clipboard content to clipboard manager when destroying the window, if we are the clipboard owner \ No newline at end of file +Send clipboard content to clipboard manager when destroying the window, if we are the clipboard owner. +Bypass compositor when fullscreening application. +Sort matrix events by timestamp. This is needed to make name change and other similar things work properly, otherwise @ mention wont work as it may show an older name if a previous event is fetched and contains name change. Also applies to join/leave events and other things. \ No newline at end of file diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index df98a5a..c9e52c1 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -834,31 +834,68 @@ namespace QuickMedia { .manga_id_handler("/manga/", "/"); } - static std::vector get_javascript_array_string(const std::string &html_source, const std::string &var_name) { - std::vector arr; - size_t sources_start = html_source.find(var_name + "=["); - if(sources_start == std::string::npos) - return arr; - - sources_start += var_name.size() + 1; // just before [ - size_t json_end = find_end_of_json_array(html_source.c_str(), sources_start, html_source.size()); - if(json_end == size_t(-1)) - return arr; - - sources_start += 1; - json_end -= 1; - std::string urls_str = html_source.substr(sources_start, json_end - sources_start); - string_replace_all(urls_str, "'", ""); - - string_split(urls_str, ',', [&arr](const char *str, size_t size) { - std::string url(str, size); - url = strip(url); - if(!url.empty()) - arr.push_back(std::move(url)); - return true; - }); + static std::vector extract_javascript_sections(const std::string &html_source) { + std::vector sections; + size_t start = 0; + + while(true) { + start = html_source.find("", start); + if(start == std::string::npos) + break; + + start += 1; + size_t end = html_source.find("", start); + if(end == std::string::npos) + break; - return arr; + sections.push_back(html_source.substr(start, end - start)); + start = end + 9; + } + + return sections; + } + + static std::vector get_javascript_string_arrays_unique(const std::string &js_source) { + std::vector arrays; + size_t start = 0; + + while(true) { + start = js_source.find("=['", start); + if(start == std::string::npos) + break; + + start += 3; + size_t end = js_source.find("]", start); + if(end == std::string::npos) + break; + + size_t sources_start = start - 2; // just before [ + size_t json_end = find_end_of_json_array(js_source.c_str(), sources_start, js_source.size()); + if(json_end == size_t(-1)) + break; + + sources_start += 1; + json_end -= 1; + std::string urls_str = js_source.substr(sources_start, json_end - sources_start); + string_replace_all(urls_str, "'", ""); + + string_split(urls_str, ',', [&arrays](const char *str, size_t size) { + std::string url(str, size); + url = strip(url); + if(!url.empty() && (arrays.empty() || arrays.back() != url)) + arrays.push_back(std::move(url)); + return true; + }); + + start = end + 1; + } + + return arrays; } static void add_mangakatana_handlers(MangaGenericSearchPage *manga_generic_search_page) { @@ -879,12 +916,12 @@ namespace QuickMedia { .list_chapters_handler("//div[class='chapters']//div[class='chapter']//a[0]", "text", "href", "/manga/") .list_chapters_uploaded_time_handler("//div[class='chapters']//div[class='update_time']", "text", nullptr) .list_page_images_custom_handler([](const std::string &html_source) { - std::vector urls_ytaw = get_javascript_array_string(html_source, "ytaw"); - std::vector urls_htnc = get_javascript_array_string(html_source, "htnc"); - if(urls_ytaw.size() >= urls_htnc.size()) - return urls_ytaw; - else - return urls_htnc; + std::vector urls; + for(const std::string &js_section : extract_javascript_sections(html_source)) { + std::vector js_string_array = get_javascript_string_arrays_unique(js_section); + urls.insert(urls.end(), std::move_iterator(js_string_array.begin()), std::move_iterator(js_string_array.end())); + } + return urls; }) .manga_id_handler("/manga/", nullptr); } -- cgit v1.2.3