diff options
author | dec05eba <dec05eba@protonmail.com> | 2020-05-28 02:47:58 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2020-07-06 07:12:34 +0200 |
commit | 138f83a92770c236f703ba9b9172f321fa59b6ad (patch) | |
tree | b6bdab8ae6eb395e6340c11105284c06037cb26a | |
parent | 03534b3fcfd56f7725b5a558a1c6361a1d6db1da (diff) |
List all chapters on mangadex, use correct download server
-rwxr-xr-x | automedia.py | 4 | ||||
-rwxr-xr-x | plugins/mangadex.py | 66 |
2 files changed, 50 insertions, 20 deletions
diff --git a/automedia.py b/automedia.py index 4158d44..5670988 100755 --- a/automedia.py +++ b/automedia.py @@ -582,6 +582,10 @@ def sync(rss_config_dir, html_config_dir, download_dir, sync_rate_sec): sync_rss(rss, tc) # Add last synced timestamp. This together with "updated" file is used to remove series # that haven't updated in a long time (either finished series or axes series) + + # TODO: Fix. This will fail if the path is removed while its running, + # same with other places. + # To fix, add a command to remove tracked items safely with open(os.path.join(rss_tracked_dir, rss.title, "synced"), "w") as file: file.write(str(time.time())) #else: diff --git a/plugins/mangadex.py b/plugins/mangadex.py index 238a264..4890417 100755 --- a/plugins/mangadex.py +++ b/plugins/mangadex.py @@ -9,6 +9,10 @@ import re from lxml import etree +headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" +} + def usage(): print("mangadex.py command") print("commands:") @@ -32,7 +36,7 @@ if len(sys.argv) < 2: usage() def download_file(url, save_path): - with requests.get(url, stream=True) as response: + with requests.get(url, headers=headers, stream=True) as response: if not response.ok: return False with open(save_path, "wb") as file: @@ -46,6 +50,9 @@ def title_url_extract_manga_id(url): if result and len(result.groups()) > 0: return result.groups()[0] +def chapter_sort_func(chapter_data): + return chapter_data[1].get("timestamp", 0) + # TODO: Support pagination. Currently only the n latest chapters are listed, # but going through all pages might be too slow for large manga like naruto def list_chapters(url, chapter_list_input): @@ -54,7 +61,8 @@ def list_chapters(url, chapter_list_input): print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title/<number>/..." % url) exit(2) - response = requests.get(url) + url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id; + response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapters, server responded with status code %d" % response.status_code) exit(2) @@ -71,29 +79,46 @@ def list_chapters(url, chapter_list_input): if url and len(url) > 0: seen_urls.add(url) - lang = "1" # english + lang = "gb" # english + + json_response = response.json() + status = json_response["status"] + if status != "OK": + print("Expected server response OK, got %s" % status) + exit(2) - tree = etree.HTML(response.text) + chapter_json = json_response["chapter"] chapters = [] - prev_chapter_number = None - for element in tree.xpath("//div[@data-manga-id='%s']" % manga_id): - chapter_lang = element.attrib.get("data-lang") - if chapter_lang != lang: + for key, value in chapter_json.items(): + chapters.append((key, value)) + + chapters = sorted(chapters, key=chapter_sort_func, reverse=True) + prev_chapter_number = "" + output_chapters = [] + for chapter_data in chapters: + chapter_id = chapter_data[0] + chapter = chapter_data[1] + + lang_code = chapter.get("lang_code", "") + if lang_code != lang: continue - chapter_id = element.attrib.get("data-id") - chapter_url = "https://mangadex.org/chapter/%s" % chapter_id - chapter_title = element.attrib.get("data-title") # optional - chapter_number = element.attrib.get("data-chapter") - if prev_chapter_number and chapter_number == prev_chapter_number: + + chapter_number_str = chapter.get("chapter", "0") + if chapter_number_str == prev_chapter_number: continue - prev_chapter_number = chapter_number - chapter_name = "Ch. %s" % chapter_number + prev_chapter_number = chapter_number_str + + chapter_title = chapter.get("title") + chapter_url = "https://mangadex.org/chapter/" + chapter_id + chapter_name = "Ch. " + chapter_number_str if chapter_title: - chapter_name += " - %s" % chapter_title + chapter_name += " - " + chapter_title + if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls: break - chapters.append({ "name": chapter_name, "url": chapter_url }) - print(json.dumps(chapters)) + + output_chapters.append({ "name": chapter_name, "url": chapter_url }) + print(json.dumps(output_chapters)) def chapter_url_extract_manga_id(url): result = re.search("mangadex.org/chapter/([0-9]+)", url) @@ -109,7 +134,7 @@ def download_chapter(url, download_dir): url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id - response = requests.get(url) + response = requests.get(url, headers=headers) if response.status_code != 200: print("Failed to list chapter images, server responded with status code %d" % response.status_code) exit(2) @@ -126,8 +151,9 @@ def download_chapter(url, download_dir): exit(2) chapter_hash = json_response["hash"] + server = json_response.get("server", "https://mangadex.org/data/") for image_name in json_response["page_array"]: - image_url = "https://mangadex.org/data/%s/%s" % (chapter_hash, image_name) + image_url = "%s%s/%s" % (server, chapter_hash, image_name) ext = image_url[image_url.rfind("."):] image_name = str(img_number) + ext image_path = os.path.join(download_dir, image_name) |