From bdbf6b363c67992a82ea54fdd54474eaad39a3d8 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 12 Apr 2020 17:39:31 +0200 Subject: Add support for mangadex --- plugins/mangadex.py | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100755 plugins/mangadex.py (limited to 'plugins/mangadex.py') diff --git a/plugins/mangadex.py b/plugins/mangadex.py new file mode 100755 index 0000000..a1e004b --- /dev/null +++ b/plugins/mangadex.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import os +import time +import sys +import requests +import json +import re + +from lxml import etree + +def usage(): + print("mangadex.py command") + print("commands:") + print(" download") + print(" list") + exit(1) + +def usage_list(): + print("mangadex.py list ") + exit(1) + +def usage_download(): + print("mangadex.py download ") + print("examples:") + print(" mangadex.py download \"https://mangadex.org/title/7139/one-punch-man\" /home/adam/Manga/MangaName") + print("") + print("Note: The manga directory has to exist.") + exit(1) + +if len(sys.argv) < 2: + usage() + +def download_file(url, save_path): + with requests.get(url, stream=True) as response: + response.raise_for_status() + with open(save_path, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + file.write(chunk) + +def title_url_extract_manga_id(url): + result = re.search("mangadex.org/title/([0-9]+)/", url) + if result and len(result.groups()) > 0: + return result.groups()[0] + +# TODO: Support pagination. Currently only the n latest chapters are listed, +# but going through all pages might be too slow for large manga like naruto +def list_chapters(url, chapter_list_input): + manga_id = title_url_extract_manga_id(url) + if not manga_id: + print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title//..." % url) + exit(2) + + response = requests.get(url) + if response.status_code != 200: + print("Failed to list chapters, server responded with status code %d" % response.status_code) + exit(2) + + seen_titles = set() + for item in chapter_list_input: + title = item.get("title") + if title and len(title) > 0: + seen_titles.add(title.lower().replace(" ", "")) + + seen_urls = set() + for item in chapter_list_input: + url = item.get("url") + if url and len(url) > 0: + seen_urls.add(url) + + lang = "1" # english + + tree = etree.HTML(response.text) + chapters = [] + for element in tree.xpath("//div[@data-manga-id='%s']" % manga_id): + chapter_lang = element.attrib.get("data-lang") + if chapter_lang != lang: + continue + chapter_id = element.attrib.get("data-id") + chapter_url = "https://mangadex.org/chapter/%s" % chapter_id + chapter_title = element.attrib.get("data-title") # optional + chapter_number = element.attrib.get("data-chapter") + chapter_name = "Ch. %s" % chapter_number + if chapter_title: + chapter_name += " - %s" % chapter_title + if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls: + break + chapters.append({ "name": chapter_name, "url": chapter_url }) + print(json.dumps(chapters)) + +def chapter_url_extract_manga_id(url): + result = re.search("mangadex.org/chapter/([0-9]+)", url) + if result and len(result.groups()) > 0: + return result.groups()[0] + +def download_chapter(url, download_dir): + manga_id = chapter_url_extract_manga_id(url) + if not manga_id: + print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/chapter/" % url) + exit(2) + + url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id + + response = requests.get(url) + if response.status_code != 200: + print("Failed to list chapter images, server responded with status code %d" % response.status_code) + exit(2) + + in_progress_filepath = os.path.join(download_dir, ".in_progress") + with open(in_progress_filepath, "w") as file: + file.write(url) + + img_number = 1 + json_response = response.json() + status = json_response["status"] + if status != "OK": + print("Expected server response OK, got %s" % status) + exit(2) + + chapter_hash = json_response["hash"] + for image_name in json_response["page_array"]: + image_url = "https://mangadex.org/data/%s/%s" % (chapter_hash, image_name) + ext = image_url[image_url.rfind("."):] + image_name = str(img_number) + ext + image_path = os.path.join(download_dir, image_name) + print("Downloading {} to {}".format(image_url, image_path)) + download_file(image_url, image_path) + img_number += 1 + + with open(os.path.join(download_dir, ".finished"), "w") as file: + file.write("1") + + os.remove(in_progress_filepath) + +command = sys.argv[1] +if command == "list": + if len(sys.argv) < 3: + usage_list() + + url = sys.argv[2] + chapter_list_input = sys.stdin.read() + if len(chapter_list_input) == 0: + chapter_list_input = [] + else: + chapter_list_input = json.loads(chapter_list_input) + list_chapters(url, chapter_list_input) +elif command == "download": + if len(sys.argv) < 4: + usage_download() + url = sys.argv[2] + download_dir = sys.argv[3] + download_chapter(url, download_dir) +else: + usage() -- cgit v1.2.3