From 2d1bdbf0cb274118f06dfa501fb77ecf0e5cb57a Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 26 Jul 2022 19:19:24 +0200 Subject: Redirect mangakakalot/broken manganelo links to migrated url --- plugins/manganelo.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/plugins/manganelo.py b/plugins/manganelo.py index 312d093..4427334 100755 --- a/plugins/manganelo.py +++ b/plugins/manganelo.py @@ -8,6 +8,10 @@ import json from lxml import etree +headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" +} + def usage(): print("manganelo.py command") print("commands:") @@ -54,8 +58,51 @@ def download_file(url, save_path): file_size += len(chunk) return file_size +def redirect_check(response_text, url): + idx = response_text.find('window.location.assign("') + if idx == -1: + return (response_text, url) + + idx += 24 + end = response_text.find('"', idx) + if end == -1: + return (response_text, url) + + response = requests.get(response_text[idx:end], timeout=30, headers=headers) + response.raise_for_status() + return (response.text, response.url) + +def redirect_migrated_url(url, tree, is_chapter): + for element in tree.xpath('//div[@class="panel-not-found"]'): + if url[-1] == '/': + url = url[:-1] + + url = url.replace("manganelo", "mangakakalot") + chapter = None + if is_chapter: + idx = url.rfind('/') + if idx == -1: + print("Chapter not found in url") + return None + chapter = url[idx + 1:] + url = url[0:idx] + + response = requests.get(url, timeout=30, headers=headers) + response.raise_for_status() + + text, url = redirect_check(response.text, url) + if chapter: + url = url + "/" + chapter + response = requests.get(url, timeout=30, headers=headers) + response.raise_for_status() + text = response.text + + return etree.HTML(text) + + return tree + def list_chapters(url, chapter_list_input): - response = requests.get(url, timeout=30) + response = requests.get(url, timeout=30, headers=headers) response.raise_for_status() seen_titles = set() @@ -71,6 +118,8 @@ def list_chapters(url, chapter_list_input): seen_urls.add(chapter_url.replace("mangakakalot", "manganelo")) tree = etree.HTML(response.text) + tree = redirect_migrated_url(url, tree, False) + chapters = [] for element in tree.xpath('//ul[@class="row-content-chapter"]//a'): element_text = element.text.strip().replace("/", "_") @@ -89,6 +138,11 @@ def download_chapter(url, download_dir): file.write(url) tree = etree.HTML(response.text) + tree = redirect_migrated_url(url, tree, True) + if tree is None: + os.remove(in_progress_filepath) + exit(2) + img_number = 1 for image_source in tree.xpath('//div[@class="container-chapter-reader"]/img/@src'): ext = image_source[image_source.rfind("."):] @@ -127,6 +181,7 @@ if command == "list": elif command == "download": if len(sys.argv) < 4: usage_download() + url = sys.argv[2].replace("mangakakalot", "manganelo") download_dir = sys.argv[3] download_chapter(url, download_dir) -- cgit v1.2.3