#!/usr/bin/env python3 import os import time import sys import requests import json from lxml import etree headers = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" } def usage(): print("mangatown.py command") print("commands:") print(" download") print(" list") exit(1) def usage_list(): print("mangatown.py list ") exit(1) def usage_download(): print("mangatown.py download ") print("examples:") print(" mangatown.py download \"https://www.mangatown.com/manga/naruto/v63/c700.2/\" /home/adam/Manga/MangaName") print("") print("Note: The manga directory has to exist.") exit(1) if len(sys.argv) < 2: usage() def download_file(url, save_path): file_size = 0 with requests.get(url, headers=headers, stream=True) as response: if not response.ok: return 0 with open(save_path, "wb") as file: for chunk in response.iter_content(chunk_size=8192): if chunk: file.write(chunk) file_size += len(chunk) return file_size def list_chapters(url, chapter_list_input): response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapters, server responded with status code %d" % response.status_code) exit(2) seen_titles = set() for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: seen_titles.add(title.lower().replace(" ", "")) seen_urls = set() for item in chapter_list_input: url = item.get("url") if url and len(url) > 0: seen_urls.add(url) tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//ul[@class="chapter_list"]//a'): element_text = element.text.strip() url = element.attrib.get("href").strip() if "/manga/" not in url: continue url = "https://www.mangatown.com" + url if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break chapters.append({ "name": element_text, "url": url }) print(json.dumps(chapters)) def is_only_num(s): for c in s: if c < '0' or c > '9': return False return True def download_chapter(url, download_dir): response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapter images, server responded with status code %d" % response.status_code) exit(2) in_progress_filepath = os.path.join(download_dir, ".in_progress") with open(in_progress_filepath, "w") as file: file.write(url) img_number = 1 while True: full_url = url + str(img_number) + ".html" response = requests.get(full_url, headers=headers) if response.status_code != 200: break tree = etree.HTML(response.text) for image_source in tree.xpath('//div[@id="viewer"]//img/@src'): if "/store/manga/" not in image_source: continue if len(image_source) >= 2: image_source = image_source[2:] image_source = "https://" + image_source image_name = str(img_number) + ".jpg" image_path = os.path.join(download_dir, image_name) print("Downloading {} to {}".format(image_source, image_path)) if not download_file(image_source, image_path): print("Failed to download image: %s" % image_source) os.remove(in_progress_filepath) exit(2) img_number += 1 if img_number == 1: print("Failed to find images for chapter") os.remove(in_progress_filepath) exit(2) with open(os.path.join(download_dir, ".finished"), "w") as file: file.write("1") os.remove(in_progress_filepath) command = sys.argv[1] if command == "list": if len(sys.argv) < 3: usage_list() url = sys.argv[2] chapter_list_input = sys.stdin.read() if len(chapter_list_input) == 0: chapter_list_input = [] else: chapter_list_input = json.loads(chapter_list_input) list_chapters(url, chapter_list_input) elif command == "download": if len(sys.argv) < 4: usage_download() url = sys.argv[2] download_dir = sys.argv[3] download_chapter(url, download_dir) else: usage()