Mangakatana: fix one page again...

author: dec05eba <dec05eba@protonmail.com> 2022-09-17 13:52:20 +0200
committer: dec05eba <dec05eba@protonmail.com> 2022-09-17 13:52:20 +0200
commit: f34a17de0d867a8788a63ae9a955052744815922 (patch)
tree: 15778bb20a99874d484cd7f3d0b9f8f22a8ac2dd
parent: 77b2d411d537ff73ccbfd1ed889256dd450f0fce (diff)
2 files changed, 70 insertions, 31 deletions
diff --git a/TODO b/TODO
index f91d993..b6cf6af 100644
--- a/TODO
+++ b/TODO
@@ -235,4 +235,6 @@ Fallback to playing videos/etc from origin homeserver in matrix if the file is l
 Fix 4chan posting! cloudflare broke shit. Then create external captcha solver program that solves captcha from captcha image.
 Add option to use invidious, and the invidious front page.
 Add proper vim modal mode. Maybe switch focused part with tab? then also need to show which part is focused.
-Send clipboard content to clipboard manager when destroying the window, if we are the clipboard owner
-\ No newline at end of file
+Send clipboard content to clipboard manager when destroying the window, if we are the clipboard owner.
+Bypass compositor when fullscreening application.
+Sort matrix events by timestamp. This is needed to make name change and other similar things work properly, otherwise @ mention wont work as it may show an older name if a previous event is fetched and contains name change. Also applies to join/leave events and other things.
+\ No newline at end of file
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index df98a5a..c9e52c1 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -834,31 +834,68 @@ namespace QuickMedia {
             .manga_id_handler("/manga/", "/");
     }
 
-    static std::vector<std::string> get_javascript_array_string(const std::string &html_source, const std::string &var_name) {
-        std::vector<std::string> arr;
-        size_t sources_start = html_source.find(var_name + "=[");
-        if(sources_start == std::string::npos)
-            return arr;
-
-        sources_start += var_name.size() + 1; // just before [
-        size_t json_end = find_end_of_json_array(html_source.c_str(), sources_start, html_source.size());
-        if(json_end == size_t(-1))
-            return arr;
-
-        sources_start += 1;
-        json_end -= 1;
-        std::string urls_str = html_source.substr(sources_start, json_end - sources_start);
-        string_replace_all(urls_str, "'", "");
-
-        string_split(urls_str, ',', [&arr](const char *str, size_t size) {
-            std::string url(str, size);
-            url = strip(url);
-            if(!url.empty())
-                arr.push_back(std::move(url));
-            return true;
-        });
+    static std::vector<std::string> extract_javascript_sections(const std::string &html_source) {
+        std::vector<std::string> sections;
+        size_t start = 0;
+
+        while(true) {
+            start = html_source.find("<script", start);
+            if(start == std::string::npos)
+                break;
+
+            start += 7;
+            start = html_source.find(">", start);
+            if(start == std::string::npos)
+                break;
+
+            start += 1;
+            size_t end = html_source.find("</script>", start);
+            if(end == std::string::npos)
+                break;
 
-        return arr;
+            sections.push_back(html_source.substr(start, end - start));
+            start = end + 9;
+        }
+        
+        return sections;
+    }
+
+    static std::vector<std::string> get_javascript_string_arrays_unique(const std::string &js_source) {
+        std::vector<std::string> arrays;
+        size_t start = 0;
+
+        while(true) {
+            start = js_source.find("=['", start);
+            if(start == std::string::npos)
+                break;
+
+            start += 3;
+            size_t end = js_source.find("]", start);
+            if(end == std::string::npos)
+                break;
+
+            size_t sources_start = start - 2; // just before [
+            size_t json_end = find_end_of_json_array(js_source.c_str(), sources_start, js_source.size());
+            if(json_end == size_t(-1))
+                break;
+
+            sources_start += 1;
+            json_end -= 1;
+            std::string urls_str = js_source.substr(sources_start, json_end - sources_start);
+            string_replace_all(urls_str, "'", "");
+
+            string_split(urls_str, ',', [&arrays](const char *str, size_t size) {
+                std::string url(str, size);
+                url = strip(url);
+                if(!url.empty() && (arrays.empty() || arrays.back() != url))
+                    arrays.push_back(std::move(url));
+                return true;
+            });
+
+            start = end + 1;
+        }
+
+        return arrays;
     }
 
     static void add_mangakatana_handlers(MangaGenericSearchPage *manga_generic_search_page) {
@@ -879,12 +916,12 @@ namespace QuickMedia {
             .list_chapters_handler("//div[class='chapters']//div[class='chapter']//a[0]", "text", "href", "/manga/")
             .list_chapters_uploaded_time_handler("//div[class='chapters']//div[class='update_time']", "text", nullptr)
             .list_page_images_custom_handler([](const std::string &html_source) {
-                std::vector<std::string> urls_ytaw = get_javascript_array_string(html_source, "ytaw");
-                std::vector<std::string> urls_htnc = get_javascript_array_string(html_source, "htnc");
-                if(urls_ytaw.size() >= urls_htnc.size())
-                    return urls_ytaw;
-                else
-                    return urls_htnc;
+                std::vector<std::string> urls;
+                for(const std::string &js_section : extract_javascript_sections(html_source)) {
+                    std::vector<std::string> js_string_array = get_javascript_string_arrays_unique(js_section);
+                    urls.insert(urls.end(), std::move_iterator(js_string_array.begin()), std::move_iterator(js_string_array.end()));
+                }
+                return urls;
             })
             .manga_id_handler("/manga/", nullptr);
     }
author	dec05eba <dec05eba@protonmail.com>	2022-09-17 13:52:20 +0200
committer	dec05eba <dec05eba@protonmail.com>	2022-09-17 13:52:20 +0200
commit	f34a17de0d867a8788a63ae9a955052744815922 (patch)
tree	15778bb20a99874d484cd7f3d0b9f8f22a8ac2dd
parent	77b2d411d537ff73ccbfd1ed889256dd450f0fce (diff)