From 48e757baffbf75bc8a1e4171ad94c27d7356cafa Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 7 May 2021 07:38:59 +0200 Subject: Migrate mangadex to new api, remove .in_progress files in tracked dir if they are old --- plugins/lhtranslation.py | 4 +- plugins/mangadex.py | 165 ++++++++++++++++++++++------------------------- plugins/mangakatana.py | 4 +- plugins/manganelo.py | 4 +- plugins/manganelos.py | 4 +- plugins/mangaplus.py | 4 +- plugins/mangatown.py | 4 +- plugins/mangawindow.py | 4 +- plugins/readm.py | 4 +- 9 files changed, 93 insertions(+), 104 deletions(-) (limited to 'plugins') diff --git a/plugins/lhtranslation.py b/plugins/lhtranslation.py index 153f4ed..082b865 100755 --- a/plugins/lhtranslation.py +++ b/plugins/lhtranslation.py @@ -50,7 +50,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -61,7 +61,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath("//div[@class='list-chapters']//a[@class='chapter']"): - title = element.find("b").text.strip() + title = element.find("b").text.strip().replace("/", "_") url = "https://lhtranslation.net/" + element.attrib.get("href").strip() if title.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/mangadex.py b/plugins/mangadex.py index 72b28f3..04bc800 100755 --- a/plugins/mangadex.py +++ b/plugins/mangadex.py @@ -21,13 +21,13 @@ def usage(): exit(1) def usage_list(): - print("mangadex.py list ") + print("mangadex.py list ") exit(1) def usage_download(): - print("mangadex.py download ") + print("mangadex.py download ") print("examples:") - print(" mangadex.py download \"https://mangadex.org/title/7139/one-punch-man\" /home/adam/Manga/MangaName") + print(" mangadex.py download \"4e4a1ed8-d4a0-4096-86db-ca5e3fc42c5d\" /home/adam/Manga/MangaName") print("") print("Note: The manga directory has to exist.") exit(1) @@ -35,8 +35,8 @@ def usage_download(): if len(sys.argv) < 2: usage() -def download_file(url, cookies, save_path): - with requests.get(url, headers=headers, cookies=cookies, stream=True) as response: +def download_file(url, save_path): + with requests.get(url, headers=headers, stream=True) as response: if not response.ok: return False with open(save_path, "wb") as file: @@ -45,11 +45,6 @@ def download_file(url, cookies, save_path): file.write(chunk) return True -def title_url_extract_manga_id(url): - result = re.search("mangadex.org/title/([0-9]+)", url) - if result and len(result.groups()) > 0: - return result.groups()[0] - def chapter_sort_func(chapter_data): return chapter_data[1].get("timestamp", 0) @@ -58,24 +53,16 @@ def chapter_title_extract_number(chapter_title): if result and len(result.groups()) > 0: return result.groups()[0] -def list_chapters(url, chapter_list_input): - manga_id = title_url_extract_manga_id(url) - if not manga_id: - print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title//..." % url) - exit(2) - - url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id; - response = requests.get(url, headers=headers) - if response.status_code != 200: - print("Failed to list chapters, server responded with status code %d" % response.status_code) - exit(2) +def sort_chapters(chapter): + return chapter["chapter"] +def list_chapters(url, chapter_list_input): seen_titles = set() seen_chapter_numbers = set() for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) chapter_number = chapter_title_extract_number(title) if chapter_number: seen_chapter_numbers.add(chapter_number) @@ -86,93 +73,95 @@ def list_chapters(url, chapter_list_input): if url and len(url) > 0: seen_urls.add(url) - lang = "gb" # english + lang = "en" - json_response = response.json() - status = json_response["status"] - if status != "OK": - print("Expected server response OK, got %s" % status) - exit(2) - - chapter_json = json_response["chapter"] - time_now = time.time() + i = 0 prev_chapter_number = "" - output_chapters = [] - for chapter_id, chapter in chapter_json.items(): - timestamp = chapter.get("timestamp", 0) - if timestamp > time_now: - continue - - lang_code = chapter.get("lang_code", "") - if lang_code != lang: - continue - - chapter_number_str = chapter.get("chapter", "0") - if chapter_number_str == prev_chapter_number: - continue - prev_chapter_number = chapter_number_str - - chapter_title = chapter.get("title") - chapter_url = "https://mangadex.org/chapter/" + chapter_id - chapter_name = "Ch. " + chapter_number_str - if chapter_title and len(chapter_title) > 0: - chapter_name += " - " + chapter_title - - if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls: + chapters = [] + while True: + url = "https://api.mangadex.org/chapter?manga=" + url + "&limit=100&offset=%d&order[publishAt]=desc" % (i * 100) + response = requests.get(url, headers=headers) + response.raise_for_status() + + if len(response.text) == 0: break - if chapter_number_str in seen_chapter_numbers: + json_response = response.json() + results = json_response["results"] + if len(results) == 0: break - output_chapters.append({ "name": chapter_name, "url": chapter_url }) - print(json.dumps(output_chapters)) + for result_item in results: + if result_item["result"] != "ok": + print("Got item with result != ok") + exit(1) -def chapter_url_extract_manga_id(url): - result = re.search("mangadex.org/chapter/([0-9]+)", url) - if result and len(result.groups()) > 0: - return result.groups()[0] + data = result_item["data"] + id = data["id"] + attributes = data["attributes"] + if attributes["translatedLanguage"] != lang: + continue -def download_chapter(url, download_dir): - request_url = url - manga_id = chapter_url_extract_manga_id(url) - if not manga_id: - print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/chapter/" % url) - exit(2) + chapter_number_str = attributes["chapter"] + if chapter_number_str == prev_chapter_number: + continue + prev_chapter_number = chapter_number_str - response = requests.get(request_url, headers=headers) - if response.status_code != 200: - print("Failed to list chapter images, server responded with status code %d" % response.status_code) - exit(2) + title = "Ch. " + chapter_number_str + title_item = attributes.get("title") + if title_item: + title += " - " + title_item + title = title.replace("/", "_") - cookies = response.cookies + if title.lower().replace(" ", "") in seen_titles or id in seen_urls: + break - url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id + if chapter_number_str in seen_chapter_numbers: + break - response = requests.get(url, headers=headers) - if response.status_code != 200: - print("Failed to list chapter images, server responded with status code %d" % response.status_code) - exit(2) + chapters.append({ "name": title, "url": id, "chapter": int(chapter_number_str) }) + i += 1 + + chapters = sorted(chapters, key=sort_chapters, reverse=True) + output_chapters = [] + for chapter in chapters: + output_chapters.append({ "name": chapter["name"], "url": chapter["url"] }) + print(json.dumps(output_chapters)) + +def get_base_url_for_chapter(chapter_id): + response = requests.get("https://api.mangadex.org/at-home/server/" + chapter_id, headers=headers) + response.raise_for_status() + return response.json()["baseUrl"] + +def download_chapter(url, download_dir): + base_url = get_base_url_for_chapter(url) + response = requests.get("https://api.mangadex.org/chapter/" + url, headers=headers) + response.raise_for_status() + + image_urls = [] + json_response = response.json() + if json_response["result"] != "ok": + print("Got item with result != ok") + exit(1) + + data = json_response["data"] + attributes = data["attributes"] + hash = attributes["hash"] + image_names = attributes["data"] + for image_name in image_names: + image_urls.append(base_url + "/data/" + hash + "/" + image_name) in_progress_filepath = os.path.join(download_dir, ".in_progress") with open(in_progress_filepath, "w") as file: - file.write(request_url) + file.write(url) img_number = 1 - json_response = response.json() - status = json_response["status"] - if status != "OK": - print("Expected server response OK, got %s" % status) - exit(2) - - chapter_hash = json_response["hash"] - server = json_response.get("server", "https://mangadex.org/data/") - for image_name in json_response["page_array"]: - image_url = "%s%s/%s" % (server, chapter_hash, image_name) + for image_url in image_urls: ext = image_url[image_url.rfind("."):] image_name = str(img_number) + ext image_path = os.path.join(download_dir, image_name) print("Downloading {} to {}".format(image_url, image_path)) - if not download_file(image_url, cookies, image_path): + if not download_file(image_url, image_path): print("Failed to download image: %s" % image_url) os.remove(in_progress_filepath) exit(2) diff --git a/plugins/mangakatana.py b/plugins/mangakatana.py index f2195a4..e90d916 100755 --- a/plugins/mangakatana.py +++ b/plugins/mangakatana.py @@ -54,7 +54,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -65,7 +65,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//div[@class="chapters"]//div[@class="chapter"]//a'): - element_text = element.text.strip() + element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/manganelo.py b/plugins/manganelo.py index 1f0882c..57eb0cc 100755 --- a/plugins/manganelo.py +++ b/plugins/manganelo.py @@ -64,7 +64,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -75,7 +75,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//ul[@class="row-content-chapter"]//a'): - element_text = element.text.strip() + element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/manganelos.py b/plugins/manganelos.py index 7e220d5..54c2625 100755 --- a/plugins/manganelos.py +++ b/plugins/manganelos.py @@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//section[@id="examples"]//div[@class="chapter-list"]//a'): - element_text = element.text.strip() + element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/mangaplus.py b/plugins/mangaplus.py index 7104d80..0d87ddc 100755 --- a/plugins/mangaplus.py +++ b/plugins/mangaplus.py @@ -219,7 +219,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -235,7 +235,7 @@ def list_chapters(url, chapter_list_input): chapters = [] for chapter in reversed(all_chapters): - title = chapter.subtitle + title = chapter.subtitle.replace("/", "_") url = "https://mangaplus.shueisha.co.jp/viewer/{0}".format(chapter.id) if title.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/mangatown.py b/plugins/mangatown.py index d3a7e04..1a7eae9 100755 --- a/plugins/mangatown.py +++ b/plugins/mangatown.py @@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//ul[@class="chapter_list"]//a'): - element_text = element.text.strip() + element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if "/manga/" not in url: continue diff --git a/plugins/mangawindow.py b/plugins/mangawindow.py index b5f5bf8..3a8c30f 100755 --- a/plugins/mangawindow.py +++ b/plugins/mangawindow.py @@ -51,7 +51,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -66,7 +66,7 @@ def list_chapters(url, chapter_list_input): if title is None: print("Failed to get title for chapter") exit(2) - title = title.strip() + title = title.strip().replace("/", "_") url = "https://mangawindow.net" + element.attrib.get("href").strip() if title.lower().replace(" ", "") in seen_titles or url in seen_urls: break diff --git a/plugins/readm.py b/plugins/readm.py index 3101587..74ae43c 100755 --- a/plugins/readm.py +++ b/plugins/readm.py @@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input): for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: - seen_titles.add(title.lower().replace(" ", "")) + seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: @@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input): tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//div[@class="episodes-list"]//a'): - element_text = element.text.strip() + element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() url = "https://readm.org" + url if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: -- cgit v1.2.3