From e763555bacec4cf755d6a8233e20b6d541a8331f Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 17 Sep 2022 13:41:08 +0200 Subject: Mangakatana: fix one page again... --- plugins/mangakatana.py | 68 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/plugins/mangakatana.py b/plugins/mangakatana.py index ef50db0..85fd8d0 100755 --- a/plugins/mangakatana.py +++ b/plugins/mangakatana.py @@ -73,17 +73,54 @@ def list_chapters(url, chapter_list_input): print(json.dumps(chapters)) -def get_javascript_array_string(html_source, var_name): - sources_start = html_source.find(var_name + "=[") - if sources_start == -1: - return [] - - sources_start += len(var_name) + 2 - sources_end = html_source.find("]", sources_start) - if sources_end == -1: - return [] - - return html_source[sources_start:sources_end].replace("'", "").split(",") +def extract_javascript_sections(html_source): + sections = [] + start = 0 + + while True: + start = html_source.find("", start) + if start == -1: + return sections + + start += 1 + end = html_source.find("", start) + if end == -1: + return sections + + sections.append(html_source[start:end]) + start = end + 9 + +def get_javascript_string_arrays(js_source): + arrays = [] + start = 0 + + while True: + start = js_source.find("=['", start) + if start == -1: + return arrays + + start += 3 + end = js_source.find("]", start) + if end == -1: + return arrays + + arr = js_source[start:end].replace("'", "").split(",") + arrays.extend(list(filter(None, arr))) + start = end + 1 + +def uniq_str_arr(arr): + res = [] + prev = "" + for d in arr: + if d != prev: + res.append(d) + prev = d + return res def download_chapter(url, download_dir): response = requests.get(url, timeout=30) @@ -94,9 +131,12 @@ def download_chapter(url, download_dir): file.write(url) response_text = response.text - urls_ytaw = get_javascript_array_string(response_text, "ytaw") - urls_htnc = get_javascript_array_string(response_text, "htnc") - image_sources = urls_ytaw if len(urls_ytaw) >= len(urls_htnc) else urls_htnc + js_sections = extract_javascript_sections(response_text) + js_string_arrays = [] + for js_section in js_sections: + js_string_arrays.extend(uniq_str_arr(get_javascript_string_arrays(js_section))) + + image_sources = js_string_arrays img_number = 1 for image_source in image_sources: -- cgit v1.2.3