Mangakatana: fix one page again...

author: dec05eba <dec05eba@protonmail.com> 2022-09-17 13:41:08 +0200
committer: dec05eba <dec05eba@protonmail.com> 2022-09-17 13:41:08 +0200
commit: e763555bacec4cf755d6a8233e20b6d541a8331f (patch)
tree: 89d806df03e5893058ac28a18f9dd8303c651e2a
parent: 8281c612b425709c33267623f4f29744b0c01896 (diff)
1 files changed, 54 insertions, 14 deletions
diff --git a/plugins/mangakatana.py b/plugins/mangakatana.py
index ef50db0..85fd8d0 100755
--- a/plugins/mangakatana.py
+++ b/plugins/mangakatana.py
@@ -73,17 +73,54 @@ def list_chapters(url, chapter_list_input):
     
     print(json.dumps(chapters))
 
-def get_javascript_array_string(html_source, var_name):
-    sources_start = html_source.find(var_name + "=[")
-    if sources_start == -1:
-        return []
-
-    sources_start += len(var_name) + 2
-    sources_end = html_source.find("]", sources_start)
-    if sources_end == -1:
-        return []
-
-    return html_source[sources_start:sources_end].replace("'", "").split(",")
+def extract_javascript_sections(html_source):
+    sections = []
+    start = 0
+
+    while True:
+        start = html_source.find("<script", start)
+        if start == -1:
+            return sections
+
+        start += 7
+        start = html_source.find(">", start)
+        if start == -1:
+            return sections
+
+        start += 1
+        end = html_source.find("</script>", start)
+        if end == -1:
+            return sections
+
+        sections.append(html_source[start:end])
+        start = end + 9
+
+def get_javascript_string_arrays(js_source):
+    arrays = []
+    start = 0
+
+    while True:
+        start = js_source.find("=['", start)
+        if start == -1:
+            return arrays
+
+        start += 3
+        end = js_source.find("]", start)
+        if end == -1:
+            return arrays
+
+        arr = js_source[start:end].replace("'", "").split(",")
+        arrays.extend(list(filter(None, arr)))
+        start = end + 1
+
+def uniq_str_arr(arr):
+    res = []
+    prev = ""
+    for d in arr:
+        if d != prev:
+            res.append(d)
+        prev = d
+    return res
 
 def download_chapter(url, download_dir):
     response = requests.get(url, timeout=30)
@@ -94,9 +131,12 @@ def download_chapter(url, download_dir):
         file.write(url)
 
     response_text = response.text
-    urls_ytaw = get_javascript_array_string(response_text, "ytaw")
-    urls_htnc = get_javascript_array_string(response_text, "htnc")
-    image_sources = urls_ytaw if len(urls_ytaw) >= len(urls_htnc) else urls_htnc
+    js_sections = extract_javascript_sections(response_text)
+    js_string_arrays = []
+    for js_section in js_sections:
+        js_string_arrays.extend(uniq_str_arr(get_javascript_string_arrays(js_section)))
+
+    image_sources = js_string_arrays
 
     img_number = 1
     for image_source in image_sources:
author	dec05eba <dec05eba@protonmail.com>	2022-09-17 13:41:08 +0200
committer	dec05eba <dec05eba@protonmail.com>	2022-09-17 13:41:08 +0200
commit	e763555bacec4cf755d6a8233e20b6d541a8331f (patch)
tree	89d806df03e5893058ac28a18f9dd8303c651e2a
parent	8281c612b425709c33267623f4f29744b0c01896 (diff)