From 171789623b231c28f33c9b34be279b2b79792089 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 17 Sep 2021 13:50:16 +0200 Subject: Fix mangatown download --- plugins/mangatown.py | 64 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 22 deletions(-) (limited to 'plugins') diff --git a/plugins/mangatown.py b/plugins/mangatown.py index 15f5681..0740925 100755 --- a/plugins/mangatown.py +++ b/plugins/mangatown.py @@ -83,6 +83,9 @@ def is_only_num(s): return True def download_chapter(url, download_dir): + if not url[-1] == '/': + url += '/' + response = requests.get(url, headers=headers) response.raise_for_status() @@ -95,35 +98,52 @@ def download_chapter(url, download_dir): for element in tree.xpath('//div[@class="page_select"]//option'): value = element.attrib.get("value", "") if "/manga/" in value and is_only_num(element.text): - num_pages += 1 + num_pages = int(element.text) - num_pages /= 2 + if num_pages == 0: + print("Failed to find number of pages for chapter") + os.remove(in_progress_filepath) + exit(2) img_number = 1 - while True: - full_url = url + str(img_number) + ".html" - response = requests.get(full_url, headers=headers) - if not response.ok(): + page_url = url + while img_number <= num_pages: + image_sources = tree.xpath("//img[@id='image']/@src") + if not image_sources or len(image_sources) == 0: + print("Failed to find images %d for chapter" % img_number) + os.remove(in_progress_filepath) + exit(2) + + image_source = "https:" + image_sources[0] + ext = image_source[image_source.rfind("."):] + image_name = str(img_number) + ext + image_path = os.path.join(download_dir, image_name) + print("Downloading {} to {}".format(image_source, image_path)) + file_size = download_file(image_source, image_path) + if file_size < 255: + print("resource temporary unavailable: %s" % image_source) + os.remove(in_progress_filepath) + exit(2) + + img_number += 1 + if img_number - 1 == num_pages: break + next_page_urls = tree.xpath("//a[@class='next_page']/@href") + if not next_page_urls or len(next_page_urls) == 0: + break + + next_page_url = url + next_page_urls[0] + page_url = next_page_url + response = requests.get(page_url, headers=headers) + if not response.ok: + print("Failed to get next page for chapter") + os.remove(in_progress_filepath) + exit(2) tree = etree.HTML(response.text) - for image_source in tree.xpath('//div[@id="viewer"]//img/@src'): - if "/store/manga/" not in image_source: - continue - if len(image_source) >= 2: - image_source = image_source[2:] - image_source = "https://" + image_source - image_name = str(img_number) + ".jpg" - image_path = os.path.join(download_dir, image_name) - print("Downloading {} to {}".format(image_source, image_path)) - if not download_file(image_source, image_path): - print("Failed to download image: %s" % image_source) - os.remove(in_progress_filepath) - exit(2) - img_number += 1 - if img_number - 1 != num_pages: - print("Failed to find images for chapter. Expected %d images, got %d" % (img_number - 1, num_pages)) + if img_number == 1 or (img_number - 1 != num_pages): + print("Failed to find images for chapter") os.remove(in_progress_filepath) exit(2) -- cgit v1.2.3