aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2022-09-17 13:41:08 +0200
committerdec05eba <dec05eba@protonmail.com>2022-09-17 13:41:08 +0200
commite763555bacec4cf755d6a8233e20b6d541a8331f (patch)
tree89d806df03e5893058ac28a18f9dd8303c651e2a
parent8281c612b425709c33267623f4f29744b0c01896 (diff)
Mangakatana: fix one page again...
-rwxr-xr-xplugins/mangakatana.py68
1 files changed, 54 insertions, 14 deletions
diff --git a/plugins/mangakatana.py b/plugins/mangakatana.py
index ef50db0..85fd8d0 100755
--- a/plugins/mangakatana.py
+++ b/plugins/mangakatana.py
@@ -73,17 +73,54 @@ def list_chapters(url, chapter_list_input):
print(json.dumps(chapters))
-def get_javascript_array_string(html_source, var_name):
- sources_start = html_source.find(var_name + "=[")
- if sources_start == -1:
- return []
-
- sources_start += len(var_name) + 2
- sources_end = html_source.find("]", sources_start)
- if sources_end == -1:
- return []
-
- return html_source[sources_start:sources_end].replace("'", "").split(",")
+def extract_javascript_sections(html_source):
+ sections = []
+ start = 0
+
+ while True:
+ start = html_source.find("<script", start)
+ if start == -1:
+ return sections
+
+ start += 7
+ start = html_source.find(">", start)
+ if start == -1:
+ return sections
+
+ start += 1
+ end = html_source.find("</script>", start)
+ if end == -1:
+ return sections
+
+ sections.append(html_source[start:end])
+ start = end + 9
+
+def get_javascript_string_arrays(js_source):
+ arrays = []
+ start = 0
+
+ while True:
+ start = js_source.find("=['", start)
+ if start == -1:
+ return arrays
+
+ start += 3
+ end = js_source.find("]", start)
+ if end == -1:
+ return arrays
+
+ arr = js_source[start:end].replace("'", "").split(",")
+ arrays.extend(list(filter(None, arr)))
+ start = end + 1
+
+def uniq_str_arr(arr):
+ res = []
+ prev = ""
+ for d in arr:
+ if d != prev:
+ res.append(d)
+ prev = d
+ return res
def download_chapter(url, download_dir):
response = requests.get(url, timeout=30)
@@ -94,9 +131,12 @@ def download_chapter(url, download_dir):
file.write(url)
response_text = response.text
- urls_ytaw = get_javascript_array_string(response_text, "ytaw")
- urls_htnc = get_javascript_array_string(response_text, "htnc")
- image_sources = urls_ytaw if len(urls_ytaw) >= len(urls_htnc) else urls_htnc
+ js_sections = extract_javascript_sections(response_text)
+ js_string_arrays = []
+ for js_section in js_sections:
+ js_string_arrays.extend(uniq_str_arr(get_javascript_string_arrays(js_section)))
+
+ image_sources = js_string_arrays
img_number = 1
for image_source in image_sources: