#!/usr/bin/env python3 import os import time import sys import requests import json from lxml import etree headers = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" } def usage(): print("manganelo.py command") print("commands:") print(" download") print(" list") exit(1) def usage_list(): print("manganelo.py list ") exit(1) def usage_download(): print("manganelo.py download ") print("examples:") print(" manganelo.py download \"https://manganelo.com/chapter/read_naruto_manga_online_free3/chapter_700.5\" /home/user/Manga/MangaName") print("") print("Note: The manga directory has to exist.") exit(1) if len(sys.argv) < 2: usage() def download_file(url, save_path): file_size = 0 headers = { "accept-language": "en-US,en;q=0.9", "accept": "image/webp,image/apng,image/*,*/*;q=0.8", "sec-fetch-site": "cross-site", "sec-fetch-mode": "no-cors", "sec-fetch-dest": "image", "referer": "https://manganelo.com/", "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" } with requests.get(url, stream=True, headers=headers, timeout=30) as response: if not response.ok: return 0 with open(save_path, "wb") as file: for chunk in response.iter_content(chunk_size=8192): if chunk: file.write(chunk) file_size += len(chunk) return file_size def redirect_check(response_text, url): idx = response_text.find('window.location.assign("') if idx == -1: return (response_text, url) idx += 24 end = response_text.find('"', idx) if end == -1: return (response_text, url) response = requests.get(response_text[idx:end], timeout=30, headers=headers) response.raise_for_status() return (response.text, response.url) def redirect_migrated_url(url, tree, is_chapter): for element in tree.xpath('//div[@class="panel-not-found"]'): if url[-1] == '/': url = url[:-1] url = url.replace("manganelo", "mangakakalot") chapter = None if is_chapter: idx = url.rfind('/') if idx == -1: print("Chapter not found in url") return None chapter = url[idx + 1:] url = url[0:idx] response = requests.get(url, timeout=30, headers=headers) response.raise_for_status() text, url = redirect_check(response.text, url) if chapter: url = url + "/" + chapter response = requests.get(url, timeout=30, headers=headers) response.raise_for_status() text = response.text return etree.HTML(text) return tree def list_chapters(url, chapter_list_input): response = requests.get(url, timeout=30, headers=headers) response.raise_for_status() seen_titles = set() for item in chapter_list_input: title = item.get("title") if title and len(title) > 0: seen_titles.add(title.lower().replace(" ", "").replace("/", "_")) seen_urls = set() for item in chapter_list_input: chapter_url = item.get("url") if chapter_url and len(chapter_url) > 0: seen_urls.add(chapter_url.replace("mangakakalot", "manganelo")) tree = etree.HTML(response.text) tree = redirect_migrated_url(url, tree, False) chapters = [] for element in tree.xpath('//ul[@class="row-content-chapter"]//a'): element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break chapters.append({ "name": element_text, "url": url }) for element in tree.xpath('//div[@class="chapter-list"]//a'): element_text = element.text.strip().replace("/", "_") url = element.attrib.get("href").strip() if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break chapters.append({ "name": element_text, "url": url }) print(json.dumps(chapters)) def download_chapter_images(url, download_dir, use_backup_server): response = "" try: cookies = {} if use_backup_server: cookies = {"content_server": "server2"} response = requests.get(url, timeout=30, cookies=cookies) response.raise_for_status() except requests.HTTPError: return False tree = etree.HTML(response.text) tree = redirect_migrated_url(url, tree, True) if tree is None: return False img_number = 1 for image_source in tree.xpath('//div[@class="container-chapter-reader"]/img/@src'): ext = image_source[image_source.rfind("."):] image_name = str(img_number) + ext image_path = os.path.join(download_dir, image_name) print("Downloading {} to {}".format(image_source, image_path)) file_size = download_file(image_source, image_path) if file_size < 255: print("resource temporary unavailable: %s" % image_source) return False img_number += 1 if img_number == 1: print("Failed to find images for chapter") return False return True def download_chapter(url, download_dir): in_progress_filepath = os.path.join(download_dir, ".in_progress") with open(in_progress_filepath, "w") as file: file.write(url) if not download_chapter_images(url, download_dir, False): if not download_chapter_images(url, download_dir, True): os.remove(in_progress_filepath) exit(2) with open(os.path.join(download_dir, ".finished"), "w") as file: file.write("1") os.remove(in_progress_filepath) command = sys.argv[1] if command == "list": if len(sys.argv) < 3: usage_list() url = sys.argv[2].replace("mangakakalot", "manganelo") chapter_list_input = sys.stdin.read() if len(chapter_list_input) == 0: chapter_list_input = [] else: chapter_list_input = json.loads(chapter_list_input) list_chapters(url, chapter_list_input) elif command == "download": if len(sys.argv) < 4: usage_download() url = sys.argv[2].replace("mangakakalot", "manganelo") download_dir = sys.argv[3] download_chapter(url, download_dir) else: usage()