diff options
author | dec05eba <dec05eba@protonmail.com> | 2021-09-17 13:50:16 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2021-09-17 13:50:16 +0200 |
commit | 171789623b231c28f33c9b34be279b2b79792089 (patch) | |
tree | 2b52196245dd75fa3b37ff33f5fb8021c31e4bb8 | |
parent | 29d09b45e98eaf4c4659517510f164c5615d7a76 (diff) |
Fix mangatown download
-rwxr-xr-x | plugins/mangatown.py | 64 |
1 files changed, 42 insertions, 22 deletions
diff --git a/plugins/mangatown.py b/plugins/mangatown.py index 15f5681..0740925 100755 --- a/plugins/mangatown.py +++ b/plugins/mangatown.py @@ -83,6 +83,9 @@ def is_only_num(s): return True def download_chapter(url, download_dir): + if not url[-1] == '/': + url += '/' + response = requests.get(url, headers=headers) response.raise_for_status() @@ -95,35 +98,52 @@ def download_chapter(url, download_dir): for element in tree.xpath('//div[@class="page_select"]//option'): value = element.attrib.get("value", "") if "/manga/" in value and is_only_num(element.text): - num_pages += 1 + num_pages = int(element.text) - num_pages /= 2 + if num_pages == 0: + print("Failed to find number of pages for chapter") + os.remove(in_progress_filepath) + exit(2) img_number = 1 - while True: - full_url = url + str(img_number) + ".html" - response = requests.get(full_url, headers=headers) - if not response.ok(): + page_url = url + while img_number <= num_pages: + image_sources = tree.xpath("//img[@id='image']/@src") + if not image_sources or len(image_sources) == 0: + print("Failed to find images %d for chapter" % img_number) + os.remove(in_progress_filepath) + exit(2) + + image_source = "https:" + image_sources[0] + ext = image_source[image_source.rfind("."):] + image_name = str(img_number) + ext + image_path = os.path.join(download_dir, image_name) + print("Downloading {} to {}".format(image_source, image_path)) + file_size = download_file(image_source, image_path) + if file_size < 255: + print("resource temporary unavailable: %s" % image_source) + os.remove(in_progress_filepath) + exit(2) + + img_number += 1 + if img_number - 1 == num_pages: break + next_page_urls = tree.xpath("//a[@class='next_page']/@href") + if not next_page_urls or len(next_page_urls) == 0: + break + + next_page_url = url + next_page_urls[0] + page_url = next_page_url + response = requests.get(page_url, headers=headers) + if not response.ok: + print("Failed to get next page for chapter") + os.remove(in_progress_filepath) + exit(2) tree = etree.HTML(response.text) - for image_source in tree.xpath('//div[@id="viewer"]//img/@src'): - if "/store/manga/" not in image_source: - continue - if len(image_source) >= 2: - image_source = image_source[2:] - image_source = "https://" + image_source - image_name = str(img_number) + ".jpg" - image_path = os.path.join(download_dir, image_name) - print("Downloading {} to {}".format(image_source, image_path)) - if not download_file(image_source, image_path): - print("Failed to download image: %s" % image_source) - os.remove(in_progress_filepath) - exit(2) - img_number += 1 - if img_number - 1 != num_pages: - print("Failed to find images for chapter. Expected %d images, got %d" % (img_number - 1, num_pages)) + if img_number == 1 or (img_number - 1 != num_pages): + print("Failed to find images for chapter") os.remove(in_progress_filepath) exit(2) |