From 3586c8d003077ee32b541f00d7690ae179448963 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 1 Jan 2020 10:34:21 +0100 Subject: Fix all chapters redownloading if latest chapter changes name Remove readms, as it's dead. --- automedia.py | 60 ++++++++++++++++--------- plugins/manganelo.py | 36 ++++++++++----- plugins/readms.py | 123 --------------------------------------------------- 3 files changed, 64 insertions(+), 155 deletions(-) delete mode 100755 plugins/readms.py diff --git a/automedia.py b/automedia.py index a8ae028..272f853 100755 --- a/automedia.py +++ b/automedia.py @@ -113,7 +113,7 @@ def get_tracked_rss(rss_tracked_dir, existing_tracked_rss): except FileNotFoundError: return [] -def rss_update_latest(rss_tracked_dir, rss, latest): +def rss_update_latest(rss_tracked_dir, rss, latest, url): with open(os.path.join(rss_tracked_dir, rss.title, "latest"), "w") as file: file.write(latest) @@ -122,11 +122,11 @@ def rss_update_latest(rss_tracked_dir, rss, latest): file.write(updated) rss.json_data["updated"] = updated - rss.json_data["downloaded"].append({ "title": latest, "time": updated }) + rss.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url }) with open(os.path.join(rss_tracked_dir, rss.title, "data"), "w") as file: json.dump(rss.json_data, file, indent=4) -def html_update_latest(html_tracked_dir, html, latest): +def html_update_latest(html_tracked_dir, html, latest, url): with open(os.path.join(html_tracked_dir, html.title, "latest"), "w") as file: file.write(latest) @@ -135,7 +135,7 @@ def html_update_latest(html_tracked_dir, html, latest): file.write(updated) html.json_data["updated"] = updated - html.json_data["downloaded"].append({ "title": latest, "time": updated }) + html.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url }) with open(os.path.join(html_tracked_dir, html.title, "data"), "w") as file: json.dump(html.json_data, file, indent=4) @@ -270,11 +270,13 @@ def add_rss(name, url, rss_config_dir, start_after): print("Name not provided and name in rss is empty") return False + start_after_url = None found_start_after = False for item in feed["items"]: title = item["title"].replace("/", "_").strip() if start_after and title == start_after: found_start_after = True + start_after_url = item["link"] break if start_after and not found_start_after: @@ -310,7 +312,7 @@ def add_rss(name, url, rss_config_dir, start_after): "downloaded": [] } if start_after: - data["downloaded"].append({ "title": start_after, "time": updated }) + data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url }) with open(os.path.join(rss_dir, "data"), "w") as file: json.dump(data, file, indent=4) @@ -339,6 +341,7 @@ def add_html(name, url, html_config_dir, start_after): print("Name not provided or empty") return False + start_after_url = None if start_after: items = plugin_list(plugin_path, url, None) if items: @@ -347,6 +350,7 @@ def add_html(name, url, html_config_dir, start_after): title = item["name"].replace("/", "_").strip() if start_after and title == start_after: found_start_after = True + start_after_url = item["url"] break if not found_start_after: @@ -386,7 +390,7 @@ def add_html(name, url, html_config_dir, start_after): "downloaded": [] } if start_after: - data["downloaded"].append({ "title": start_after, "time": updated }) + data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url }) with open(os.path.join(html_dir, "data"), "w") as file: json.dump(data, file, indent=4) @@ -410,12 +414,19 @@ def sync_rss(tracked_rss): show_notification("RSS Sync failed", "Failed to parse rss for url {}, error: {}".format(tracked_rss.link, str(feed.bozo_exception)), urgency="critical") return None + seen_titles = set() + seen_urls = set() + for downloaded_item in tracked_rss.json_data["downloaded"]: + seen_titles.add(downloaded_item["title"].lower().replace(" ", "")) + seen_urls.add(downloaded_item.get("url", "")) + items = [] for item in feed["items"]: title = item["title"].replace("/", "_").strip() + link = item["link"] # TODO: Goto next page in rss if supported, if we cant find our item on the first page #if not get_downloaded_item_by_title(tracked_rss, title): - if tracked_rss.latest and title == tracked_rss.latest: + if title.lower().replace(" ", "") in seen_titles or link in seen_urls: break items.append(item) @@ -424,9 +435,9 @@ def sync_rss(tracked_rss): latest = None for item in reversed(items): title = item["title"].replace("/", "_").strip() - rss_update_latest(rss_tracked_dir, tracked_rss, title) - link = item["link"] + rss_update_latest(rss_tracked_dir, tracked_rss, title, link) + if not add_torrent(link): return latest latest = title @@ -436,11 +447,18 @@ def sync_rss(tracked_rss): def plugin_list(plugin_path, url, latest): if not latest: - latest = "" - process = subprocess.Popen([plugin_path, "list", url, latest], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() + latest = [] + + plugin_name = os.path.basename(plugin_path) + process = None + try: + process = subprocess.Popen([plugin_path, "list", url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except FileNotFoundError as e: + print("{}: Plugin failed: Failed to launch plugin list for plugin {}, error: {}".format(str(datetime.today().isoformat()), plugin_name, str(e))) + return None + + stdout, stderr = process.communicate(json.dumps(latest).encode()) if process.returncode != 0: - plugin_name = os.path.basename(plugin_path) print("{}: Plugin failed: Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(str(datetime.today().isoformat()), plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8'))) if not only_show_finished_notification: show_notification("Plugin failed", "Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8')), urgency="critical") @@ -449,7 +467,6 @@ def plugin_list(plugin_path, url, latest): try: return json.loads(stdout.decode('utf-8')) except json.decoder.JSONDecodeError as e: - plugin_name = os.path.basename(plugin_path) if not only_show_finished_notification: show_notification("Plugin failed", "Failed to json decode response of plugin {}, error: {}".format(plugin_name, str(e)), urgency="critical") return None @@ -494,16 +511,19 @@ def resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id): except FileNotFoundError as e: pass +def build_plugin_list_input(tracked_html): + result = [] + for downloaded_item in tracked_html.json_data["downloaded"]: + result.append({ "title": downloaded_item["title"], "url": downloaded_item.get("url", "") }) + return result + # Return the title of the newest item def sync_html(tracked_html, download_dir, session_id): plugin_entry = os.path.join(script_dir, "plugins", tracked_html.plugin) resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id) html_tracked_dir = os.path.join(html_config_dir, "tracked") - # TODO: Instead of using item name to track which ones to download newer item than, - # use a number which should be the number of items that have already been downloaded. - # The reason being that some sites may rename items that we are tracking, for example - # when tracking chapter names and the chapter doesn't have a name yet. + # The program takes and index starting from 1, which is the chapter number # Program should print the names of each item (chapter for manga) after "latest", sorted by newest to oldest # along with the urls to them. @@ -520,7 +540,7 @@ def sync_html(tracked_html, download_dir, session_id): # ] # ./program list url latest # Note: @latest argument here is optional - items = plugin_list(plugin_entry, tracked_html.link, tracked_html.latest) + items = plugin_list(plugin_entry, tracked_html.link, build_plugin_list_input(tracked_html)) if not items: return None @@ -539,7 +559,7 @@ def sync_html(tracked_html, download_dir, session_id): with open(os.path.join(item_dir, ".session_id"), "w") as file: file.write(session_id) - html_update_latest(html_tracked_dir, tracked_html, name) + html_update_latest(html_tracked_dir, tracked_html, name, url) if not plugin_download(plugin_entry, url, item_dir): return latest diff --git a/plugins/manganelo.py b/plugins/manganelo.py index 12813a1..9d8a644 100755 --- a/plugins/manganelo.py +++ b/plugins/manganelo.py @@ -17,16 +17,13 @@ def usage(): exit(1) def usage_list(): - print("manganelo.py list [latest]") - print("examples:") - print(" manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\"") - print(" manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\" \"Chapter 13\"") + print("manganelo.py list ") exit(1) def usage_download(): print("manganelo.py download ") print("examples:") - print(" manganelo.py download \"https://mangakakalot.com/chapter/vy918232/chapter_16\" /home/adam/Manga/MangaName") + print(" manganelo.py download \"https://manganelo.com/chapter/read_naruto_manga_online_free3/chapter_700.5\" /home/adam/Manga/MangaName") print("") print("Note: The manga directory has to exist.") exit(1) @@ -42,19 +39,32 @@ def download_file(url, save_path): return False return True -def list_chapters(url, latest): +def list_chapters(url, chapter_list_input): response = requests.get(url) if response.status_code != 200: print("Failed to list chapters, server responded with status code %d" % response.status_code) exit(2) + seen_titles = set() + for item in chapter_list_input: + title = item.get("title") + if title and len(title) > 0: + seen_titles.add(title.lower().replace(" ", "")) + + seen_urls = set() + for item in chapter_list_input: + url = item.get("url") + if url and len(url) > 0: + seen_urls.add(url.replace("mangakakalot", "manganelo")) + tree = etree.HTML(response.text) chapters = [] for element in tree.xpath('//ul[@class="row-content-chapter"]//a'): element_text = element.text.strip() - if latest and element_text == latest: + url = element.attrib.get("href").strip() + if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls: break - chapters.append({ "name": element_text, "url": element.attrib.get("href").strip() }) + chapters.append({ "name": element_text, "url": url }) print(json.dumps(chapters)) def download_chapter(url, download_dir): @@ -89,10 +99,12 @@ if command == "list": usage_list() url = sys.argv[2].replace("mangakakalot", "manganelo") - latest = "" - if len(sys.argv) >= 4: - latest = sys.argv[3] - list_chapters(url, latest) + chapter_list_input = sys.stdin.read() + if len(chapter_list_input) == 0: + chapter_list_input = [] + else: + chapter_list_input = json.loads(chapter_list_input) + list_chapters(url, chapter_list_input) elif command == "download": if len(sys.argv) < 4: usage_download() diff --git a/plugins/readms.py b/plugins/readms.py deleted file mode 100755 index a5343b8..0000000 --- a/plugins/readms.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 - -import os -import time -import sys -import requests -import json -import subprocess - -from lxml import etree - -def usage(): - print("readms.py command") - print("commands:") - print(" download") - print(" list") - exit(1) - -def usage_list(): - print("readms.py list [latest]") - print("examples:") - print(" readms.py list \"https://readms.net/manga/a_trail_of_blood\"") - print(" readms.py list \"https://readms.net/manga/a_trail_of_blood\" \"48 - Blood oath\"") - exit(1) - -def usage_download(): - print("readms.py download ") - print("examples:") - print(" readms.py download \"https://readms.net/manga/a_trail_of_blood\" /home/adam/Manga/MangaName") - print("") - print("Note: The manga directory has to exist.") - exit(1) - -if len(sys.argv) < 2: - usage() - -def download_file(url, save_path): - process = subprocess.Popen(["wget", "-q", "-o", "/dev/null", "-O", save_path, url], stderr=subprocess.PIPE) - _, stderr = process.communicate() - if process.returncode != 0: - print("Failed to download file: {}, error: {}".format(url, stderr.decode('utf-8'))) - return False - return True - -def list_chapters(url, latest): - response = requests.get(url) - if response.status_code != 200: - print("Failed to list chapters, server responded with status code %d" % response.status_code) - exit(2) - - tree = etree.HTML(response.text) - chapters = [] - for element in tree.xpath('//table//tr//a'): - element_text = element.text.strip() - if latest and element_text == latest: - break - chapters.append({ "name": element_text, "url": "https://readms.net" + element.attrib.get("href").strip() }) - print(json.dumps(chapters)) - -def download_chapter(url, download_dir): - in_progress_filepath = os.path.join(download_dir, ".in_progress") - with open(in_progress_filepath, "w") as file: - file.write(url) - - img_number = 1 - while True: - response = requests.get(url) - if response.status_code != 200: - print("Failed to list chapters, server responded with status code %d" % response.status_code) - exit(2) - - tree = etree.HTML(response.text) - - image_sources = tree.xpath('//img[@id="manga-page"]/@src') - if len(image_sources) != 1: - break - - image_source = "https:" + image_sources[0] - ext = image_source[image_source.rfind("."):] - image_name = str(img_number) + ext - image_path = os.path.join(download_dir, image_name) - print("Downloading {} to {}".format(image_source, image_path)) - if not download_file(image_source, image_path): - exit(1) - - next_pages = tree.xpath('//div[@class="page"]//a/@href') - if len(next_pages) != 1: - break - - next_page = next_pages[0] - last_slash = next_page.rfind('/') - try: - if last_slash != -1 and int(next_page[last_slash+1:]) <= img_number: - break - except ValueError: - pass - - url = "https://readms.net" + next_page - img_number += 1 - - with open(os.path.join(download_dir, ".finished"), "w") as file: - file.write("1") - - os.remove(in_progress_filepath) - -command = sys.argv[1] -if command == "list": - if len(sys.argv) < 3: - usage_list() - - url = sys.argv[2] - latest = "" - if len(sys.argv) >= 4: - latest = sys.argv[3] - list_chapters(url, latest) -elif command == "download": - if len(sys.argv) < 4: - usage_download() - url = sys.argv[2] - download_dir = sys.argv[3] - download_chapter(url, download_dir) -else: - usage() -- cgit v1.2.3-70-g09d2