#!/usr/bin/env python3 import os import time import sys import requests import json import re from lxml import etree headers = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" } def usage(): print("mangadex.py command") print("commands:") print(" download") print(" list") exit(1) def usage_list(): print("mangadex.py list ") exit(1) def usage_download(): print("mangadex.py download ") print("examples:") print(" mangadex.py download \"https://mangadex.org/title/7139/one-punch-man\" /home/adam/Manga/MangaName") print("") print("Note: The manga directory has to exist.") exit(1) if len(sys.argv) < 2: usage() def download_file(url, save_path): with requests.get(url, headers=headers, stream=True) as response: if not response.ok: return False with open(save_path, "wb") as file: for chunk in response.iter_content(chunk_size=8192): if chunk: file.write(chunk) return True def title_url_extract_manga_id(url): result = re.search("mangadex.org/title/([0-9]+)/", url) if result and len(result.groups()) > 0: return result.groups()[0] def chapter_sort_func(chapter_data): return chapter_data[1].get("timestamp", 0) def list_chapters(url, chapter_list_input): manga_id = title_url_extract_manga_id(url) if not manga_id: print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title//..." % url) exit(2) url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id; response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapters, server responded with status code %d" % response.status_code) exit(2) seen_titles = set() for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: seen_titles.add(title.lower().replace(" ", "")) seen_urls = set() for item in chapter_list_input: url = item.get("url") if url and len(url) > 0: seen_urls.add(url) lang = "gb" # english json_response = response.json() status = json_response["status"] if status != "OK": print("Expected server response OK, got %s" % status) exit(2) chapter_json = json_response["chapter"] chapters = [] for key, value in chapter_json.items(): chapters.append((key, value)) chapters = sorted(chapters, key=chapter_sort_func, reverse=True) prev_chapter_number = "" output_chapters = [] for chapter_data in chapters: chapter_id = chapter_data[0] chapter = chapter_data[1] lang_code = chapter.get("lang_code", "") if lang_code != lang: continue chapter_number_str = chapter.get("chapter", "0") if chapter_number_str == prev_chapter_number: continue prev_chapter_number = chapter_number_str chapter_title = chapter.get("title") chapter_url = "https://mangadex.org/chapter/" + chapter_id chapter_name = "Ch. " + chapter_number_str if chapter_title: chapter_name += " - " + chapter_title if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls: break output_chapters.append({ "name": chapter_name, "url": chapter_url }) print(json.dumps(output_chapters)) def chapter_url_extract_manga_id(url): result = re.search("mangadex.org/chapter/([0-9]+)", url) if result and len(result.groups()) > 0: return result.groups()[0] def download_chapter(url, download_dir): request_url = url manga_id = chapter_url_extract_manga_id(url) if not manga_id: print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/chapter/" % url) exit(2) url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapter images, server responded with status code %d" % response.status_code) exit(2) in_progress_filepath = os.path.join(download_dir, ".in_progress") with open(in_progress_filepath, "w") as file: file.write(request_url) img_number = 1 json_response = response.json() status = json_response["status"] if status != "OK": print("Expected server response OK, got %s" % status) exit(2) chapter_hash = json_response["hash"] server = json_response.get("server", "https://mangadex.org/data/") for image_name in json_response["page_array"]: image_url = "%s%s/%s" % (server, chapter_hash, image_name) ext = image_url[image_url.rfind("."):] image_name = str(img_number) + ext image_path = os.path.join(download_dir, image_name) print("Downloading {} to {}".format(image_url, image_path)) if not download_file(image_url, image_path): print("Failed to download image: %s" % image_url) os.remove(in_progress_filepath) exit(2) img_number += 1 if img_number == 1: print("Failed to find images for chapter") os.remove(in_progress_filepath) exit(2) with open(os.path.join(download_dir, ".finished"), "w") as file: file.write("1") os.remove(in_progress_filepath) command = sys.argv[1] if command == "list": if len(sys.argv) < 3: usage_list() url = sys.argv[2] chapter_list_input = sys.stdin.read() if len(chapter_list_input) == 0: chapter_list_input = [] else: chapter_list_input = json.loads(chapter_list_input) list_chapters(url, chapter_list_input) elif command == "download": if len(sys.argv) < 4: usage_download() url = sys.argv[2] download_dir = sys.argv[3] download_chapter(url, download_dir) else: usage()