aboutsummaryrefslogtreecommitdiff
path: root/automedia.py
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2019-05-31 07:55:31 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-06 07:12:33 +0200
commit1f623da3b6b056a028c83bd1809b3429b94e1857 (patch)
tree645f71a7f5f7abae5b9110bdd89ebbbb76079eb8 /automedia.py
Initial commit, support for rss torrent, manganelo and readms
Diffstat (limited to 'automedia.py')
-rwxr-xr-xautomedia.py511
1 files changed, 511 insertions, 0 deletions
diff --git a/automedia.py b/automedia.py
new file mode 100755
index 0000000..4d6f140
--- /dev/null
+++ b/automedia.py
@@ -0,0 +1,511 @@
+#!/usr/bin/env python3
+
+import feedparser
+import subprocess
+import argparse
+import os
+import sys
+import time
+import json
+import uuid
+# TODO: Remove this shit. It gives warning and it's slow
+import tldextract
+import transmissionrpc
+
+from lxml import etree
+
+script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+class TrackedRss:
+ title = None
+ latest = None
+ link = None
+
+ def __init__(self, title, latest, link):
+ self.title = title
+ self.latest = latest
+ self.link = link
+
+class TrackedHtml:
+ title = None
+ latest = None
+ link = None
+ plugin = None
+
+ def __init__(self, title, latest, link, plugin):
+ self.title = title
+ self.latest = latest
+ self.link = link
+ self.plugin = plugin
+
+class TorrentProgress:
+ name = None
+ progress = None
+
+ def __init__(self, name, progress):
+ self.name = name
+ self.progress = progress
+
+class HtmlItemProgress:
+ name = None
+ finished = None
+
+ def __init__(self, name, finished):
+ self.name = name
+ self.finished = finished
+
+def get_file_content_or_none(path):
+ try:
+ with open(path, "r") as file:
+ return file.read()
+ except FileNotFoundError as e:
+ return None
+
+def get_tracked_rss(rss_tracked_dir):
+ try:
+ tracked_rss = []
+ for title in os.listdir(rss_tracked_dir):
+ in_progress = get_file_content_or_none(os.path.join(rss_tracked_dir, title, "in_progress"))
+ if in_progress:
+ print("Skipping in-progress rss %s" % title)
+ continue
+ latest = get_file_content_or_none(os.path.join(rss_tracked_dir, title, "latest"))
+ link = get_file_content_or_none(os.path.join(rss_tracked_dir, title, "link"))
+ if not link:
+ print("Rss corrupt, link missing for rss %s" % title)
+ continue
+ tracked_rss.append(TrackedRss(title, latest, link))
+ return tracked_rss
+ except FileNotFoundError as e:
+ return []
+
+def rss_update_latest(rss_tracked_dir, rss, latest):
+ with open(os.path.join(rss_tracked_dir, rss.title, "latest"), "w") as file:
+ file.write(latest)
+
+def html_update_latest(html_tracked_dir, html, latest):
+ with open(os.path.join(html_tracked_dir, html.title, "latest"), "w") as file:
+ file.write(latest)
+
+def get_tracked_html(html_tracked_dir):
+ try:
+ tracked_html = []
+ for title in os.listdir(html_tracked_dir):
+ in_progress = get_file_content_or_none(os.path.join(html_tracked_dir, title, "in_progress"))
+ if in_progress:
+ print("Skipping in-progress html %s" % title)
+ continue
+ latest = get_file_content_or_none(os.path.join(html_tracked_dir, title, "latest"))
+ link = get_file_content_or_none(os.path.join(html_tracked_dir, title, "link"))
+ if not link:
+ print("html corrupt, link missing for html %s" % title)
+ continue
+ plugin = get_file_content_or_none(os.path.join(html_tracked_dir, title, "plugin"))
+ if not link:
+ print("html corrupt, plugin missing for html %s" % title)
+ continue
+ tracked_html.append(TrackedHtml(title, latest, link, plugin))
+ return tracked_html
+ except FileNotFoundError as e:
+ return []
+
+# @urgency should either be "low", "normal" or "critical"
+def show_notification(title, body, urgency="normal"):
+ process = subprocess.Popen(["notify-send", "-u", urgency, title, body])
+ #process.communicate()
+
+def fetch_page(url):
+ process = subprocess.Popen(["curl", "-s", "-L", "--output", "-", url], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = process.communicate()
+ if process.returncode != 0:
+ # TODO: Add file to list of failed files, so the user knows which they should manually download
+ show_notification("Download failed", "Failed to fetch page: {}, error: {}".format(url, stderr.decode('utf-8')), urgency="critical")
+ return None
+ return stdout.decode('utf-8')
+
+def is_torrent_daemon_running():
+ process = subprocess.Popen(["transmission-remote", "-si"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ process.communicate()
+ return process.returncode == 0
+
+def start_torrent_daemon(download_dir):
+ # TODO: Make seed ratio configurable
+ process = subprocess.Popen(["transmission-daemon", "--global-seedratio", "2.0", "--download-dir", download_dir])
+ process.communicate()
+ while not is_torrent_daemon_running():
+ time.sleep(0.1)
+ return process.returncode == 0
+
+def add_torrent(torrent_link):
+ process = subprocess.Popen(["transmission-remote", "--add", torrent_link], stderr=subprocess.PIPE)
+ _, stderr = process.communicate()
+ if process.returncode != 0:
+ show_notification("Download failed", "Failed to download torrent: {}, error: {}".format(torrent_link, stderr.decode('utf-8')), urgency="critical")
+ return process.returncode == 0
+
+def get_torrent_progress(tc):
+ torrent_progress = []
+ for torrent in tc.get_torrents():
+ torrent_progress.append(TorrentProgress(torrent.name, torrent.progress))
+ return torrent_progress
+
+def get_finished_torrents(torrents):
+ filtered_torrents = []
+ for torrent in torrents:
+ if abs(100.0 - torrent.progress) <= 0.001:
+ filtered_torrents.append(torrent)
+ return filtered_torrents
+
+def get_unfinished_torrents(torrents):
+ filtered_torrents = []
+ for torrent in torrents:
+ if abs(100.0 - torrent.progress) > 0.001:
+ filtered_torrents.append(torrent)
+ return filtered_torrents
+
+def get_matching_torrents_by_name(torrents1, torrents2):
+ matching_torrents = []
+ for torrent1 in torrents1:
+ for torrent2 in torrents2:
+ if torrent1.name == torrent2.name:
+ matching_torrents.append(torrent1.name)
+ return matching_torrents
+
+def get_html_items_progress(download_dir, tracked_html):
+ items = []
+ for html in tracked_html:
+ item_dir = os.path.join(download_dir, html.title)
+ try:
+ for item in os.listdir(item_dir):
+ finished = os.path.isfile(os.path.join(item_dir, item, "finished"))
+ items.append(HtmlItemProgress(html.title + "/" + item, finished))
+ except FileNotFoundError as e:
+ pass
+ return items
+
+def get_matching_html_items_by_name(html_items1, html_items2):
+ matching_items = []
+ for html_item1 in html_items1:
+ for html_item2 in html_items2:
+ if html_item1.name == html_item2.name:
+ matching_items.append(html_item1.name)
+ return matching_items
+
+def add_rss(url, config_dir, start_after):
+ feed = feedparser.parse(url)
+ rss_name = feed["channel"]["title"].strip().replace("/", "_")
+ rss_dir = os.path.join(config_dir, "tracked", rss_name)
+ os.makedirs(rss_dir)
+
+ found_start_after = False
+ for item in feed["items"]:
+ title = item["title"].strip()
+ if start_after and title == start_after:
+ found_start_after = True
+ break
+
+ if start_after and not found_start_after:
+ print("Failed to find %s in rss %s" % (start_after, url))
+ return False
+
+ # Create an "in_progress" file to prevent periodic sync from reading rss data
+ # before we have finished adding all the data.
+ # Timestamp is added to it to make it possible to automatically cleanup rss that is corrupted
+ # (for example if the computer crashes before the in_progress file is removed).
+ in_progress_filepath = os.path.join(rss_dir, "in_progress")
+ with open(in_progress_filepath, "w") as file:
+ file.write(str(time.time()))
+
+ with open(os.path.join(rss_dir, "link"), "w") as file:
+ file.write(url)
+
+ if start_after:
+ with open(os.path.join(rss_dir, "latest"), "w") as file:
+ file.write(start_after)
+
+ os.remove(in_progress_filepath)
+ return True
+
+def add_html(name, url, config_dir, start_after):
+ domain = tldextract.extract(url).domain
+ domain_plugin_file_exists = os.path.isfile(os.path.join(script_dir, "plugins", domain))
+ domain_plugin_file_py_exists = os.path.isfile(os.path.join(script_dir, "plugins", domain + ".py"))
+ if not domain_plugin_file_exists and not domain_plugin_file_py_exists:
+ print("Plugin doesn't exist: {}".format(domain))
+ exit(2)
+
+ name = name.replace("/", "_")
+ html_dir = os.path.join(config_dir, "tracked", name)
+ os.makedirs(html_dir)
+
+ # Create an "in_progress" file to prevent periodic sync from reading rss data
+ # before we have finished adding all the data.
+ # Timestamp is added to it to make it possible to automatically cleanup rss that is corrupted
+ # (for example if the computer crashes before the in_progress file is removed).
+ in_progress_filepath = os.path.join(html_dir, "in_progress")
+ with open(in_progress_filepath, "w") as file:
+ file.write(str(int(time.time())))
+
+ with open(os.path.join(html_dir, "link"), "w") as file:
+ file.write(url)
+
+ with open(os.path.join(html_dir, "plugin"), "w") as file:
+ if domain_plugin_file_exists:
+ file.write(domain)
+ elif domain_plugin_file_py_exists:
+ file.write(domain + ".py")
+
+ if start_after:
+ with open(os.path.join(html_dir, "latest"), "w") as file:
+ file.write(start_after)
+
+ os.remove(in_progress_filepath)
+ return True
+
+
+# Return the title of the newest item
+def sync_rss(tracked_rss):
+ feed = feedparser.parse(tracked_rss.link)
+ items = []
+ for item in feed["items"]:
+ title = item["title"].strip()
+ if tracked_rss.latest and title == tracked_rss.latest:
+ break
+ items.append(item)
+
+ # Add torrents from the oldest to the newest, and stop when failing to add torrent.
+ # If it fails, there will be an attempt to add them again after next sync cycle.
+ latest = None
+ for item in reversed(items):
+ link = item["link"]
+ if not add_torrent(link):
+ return latest
+ latest = item["title"].strip()
+ show_notification("Download started", "Started downloading torrent {}".format(latest))
+ return latest
+
+def plugin_list(plugin_path, url, latest):
+ if not latest:
+ latest = ""
+ process = subprocess.Popen([plugin_path, "list", url, latest], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = process.communicate()
+ if process.returncode != 0:
+ plugin_name = os.path.basename(plugin_path)
+ show_notification("Plugin failed", "Failed to launch plugin list for plugin {}, error: {}".format(plugin_name, stderr.decode('utf-8')), urgency="critical")
+ return None
+
+ try:
+ return json.loads(stdout.decode('utf-8'))
+ except json.decoder.JSONDecodeError as e:
+ plugin_name = os.path.basename(plugin_path)
+ show_notification("Plugin failed", "Failed to json decode response of plugin {}, error: {}".format(plugin_name, str(e)), urgency="critical")
+ return None
+
+def plugin_download(plugin_path, url, download_dir):
+ subprocess.Popen([plugin_path, "download", url, download_dir], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ return True
+
+def resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id):
+ # TODO: Instead of redownloading, add resuming. This could be done by adding the files that have been downloaded to a file.
+ # Redownload items we can detect have stopped. This can happen if the computer crashes or loses connection while downloading.
+ title_dir = os.path.join(download_dir, tracked_html.title)
+ try:
+ for item in os.listdir(title_dir):
+ item_dir = os.path.join(title_dir, item)
+ if os.path.isfile(os.path.join(item_dir, "finished")):
+ continue
+
+ in_progress_path = os.path.join(item_dir, "in_progress")
+ url = get_file_content_or_none(in_progress_path)
+ # Item has finished downloading
+ if not url:
+ continue
+
+ invalid_session = False
+ try:
+ with open(os.path.join(item_dir, "session_id"), "r") as file:
+ item_session_id = file.read()
+ if item_session_id != session_id:
+ invalid_session = True
+ plugin_download(plugin_entry, url, item_dir)
+ show_notification("Resuming", "Resuming download for item {} with plugin {}".format(item, tracked_html.plugin))
+ except FileNotFoundError as e:
+ invalid_session = True
+ plugin_download(plugin_entry, url, item_dir)
+ show_notification("Resuming", "Resuming download for item {} with plugin {}".format(item, tracked_html.plugin))
+
+ if invalid_session:
+ with open(os.path.join(item_dir, "session_id"), "w") as file:
+ file.write(session_id)
+ except FileNotFoundError as e:
+ pass
+
+# Return the title of the newest item
+def sync_html(tracked_html, download_dir, session_id):
+ plugin_entry = os.path.join(script_dir, "plugins", tracked_html.plugin)
+ resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id)
+
+ # TODO: Instead of using item name to track which ones to download newer item than,
+ # use a number which should be the number of items that have already been downloaded.
+ # The reason being that some sites may rename items that we are tracking, for example
+ # when tracking chapter names and the chapter doesn't have a name yet.
+
+ # Program should print the names of each item (chapter for manga) after "latest", sorted by newest to oldest
+ # along with the urls to them.
+ # Only get the items before the one called @latest. The printed data should be in json format:
+ # {
+ # "items": [
+ # {
+ # "name": "Example name",
+ # "url": "https://example.com"
+ # },
+ # {
+ # "name": "Another item",
+ # "url": "https://another.url.com"
+ # }
+ # ]
+ # }
+ # ./program list url latest
+ # Note: @latest argument here is optional
+ items = plugin_list(plugin_entry, tracked_html.link, tracked_html.latest)
+ if not items:
+ return None
+
+ # Start downloading asynchronously using url.
+ # A file called "in_progress" should be added to the download directory when the download is in progress.
+ # The "in_progress" file should contain the url that was used to download the item.
+ # A file called "finished" should be added to the download directory when the download has finished.
+ # ./program download url download_dir
+ latest = None
+ if len(items["items"]) > 0:
+ latest = items["items"][0]["name"].replace("/", "_")
+
+ for item in reversed(items["items"]):
+ url = item["url"]
+ name = item["name"].replace("/", "_")
+ item_dir = os.path.join(download_dir, tracked_html.title, name)
+ os.makedirs(item_dir, exist_ok=True)
+
+ with open(os.path.join(item_dir, "session_id"), "w") as file:
+ file.write(session_id)
+
+ if not plugin_download(plugin_entry, url, item_dir):
+ return latest
+
+ latest = name
+ show_notification("Download started", "Started downloading item {} with plugin {}".format(tracked_html.title + "/" + name, tracked_html.plugin))
+ return latest
+
+def sync(rss_config_dir, html_config_dir, download_dir, sync_rate_sec):
+ os.makedirs(download_dir, exist_ok=True)
+ if not is_torrent_daemon_running():
+ if not start_torrent_daemon(download_dir):
+ print("Failed to start torrent daemon")
+ exit(2)
+ print("Started torrent daemon with download directory {}".format(download_dir))
+
+ rss_tracked_dir = os.path.join(rss_config_dir, "tracked")
+ html_tracked_dir = os.path.join(html_config_dir, "tracked")
+ # This is also check rate for html items
+ check_torrent_status_rate_sec = 15
+ unfinished_torrents = []
+ unfinished_html_items = []
+
+ # TODO: Remove this and keep a list of "in progress" html items in memory instead.
+ session_id = uuid.uuid4().hex
+
+ tc = transmissionrpc.Client("localhost")
+
+ running = True
+ while running:
+ tracked_rss = get_tracked_rss(rss_tracked_dir)
+ for rss in tracked_rss:
+ latest = sync_rss(rss)
+ if latest:
+ rss_update_latest(rss_tracked_dir, rss, latest)
+ #else:
+ # print("No 'latest' item found for rss (maybe we already have the latest item?) %s" % rss.title)
+ #time.sleep(0.5) # Sleep between fetching rss so we don't get banned for spamming
+
+ tracked_html = get_tracked_html(html_tracked_dir)
+ for html in tracked_html:
+ latest = sync_html(html, download_dir, session_id)
+ if latest:
+ html_update_latest(html_tracked_dir, html, latest)
+ #else:
+ # print("No 'latest' item found for html (maybe we already have the latest item?) %s" % html.title)
+ #time.sleep(0.5) # Sleep between fetching html so we don't get banned for spamming
+
+ # Check torrent status with sleeping until it's time to sync rss
+ count = 0
+ while count < sync_rate_sec/check_torrent_status_rate_sec:
+ html_items = get_html_items_progress(download_dir, tracked_html)
+ finished_html_items = [html_item for html_item in html_items if html_item.finished]
+ newly_finished_html_items = get_matching_html_items_by_name(finished_html_items, unfinished_html_items)
+ for newly_finished_html_item in newly_finished_html_items:
+ show_notification("Download finished", "Finished downloading {}".format(newly_finished_html_item))
+ unfinished_html_items = [html_item for html_item in html_items if not html_item.finished]
+
+ torrents = get_torrent_progress(tc)
+ finished_torrents = get_finished_torrents(torrents)
+ newly_finished_torrents = get_matching_torrents_by_name(finished_torrents, unfinished_torrents)
+ for newly_finished_torrent in newly_finished_torrents:
+ show_notification("Download finished", "Finished downloading {}".format(newly_finished_torrent))
+ unfinished_torrents = get_unfinished_torrents(torrents)
+
+ time.sleep(check_torrent_status_rate_sec)
+ count += 1
+
+def main():
+ parser = argparse.ArgumentParser(description="Automatic download of media (rss feed, tracking html)")
+ action_group = parser.add_mutually_exclusive_group(required=True)
+ action_group.add_argument("-a", "--add", action="store_true")
+ action_group.add_argument("-s", "--sync", action="store_true")
+
+ parser.add_argument("-t", "--type", choices=["rss", "html"], required=False)
+ parser.add_argument("-u", "--url", required=False)
+ parser.add_argument("--start-after", required=False)
+ parser.add_argument("-d", "--download-dir", required=False)
+ parser.add_argument("-n", "--name", required=False)
+ args = parser.parse_args()
+
+ if args.add:
+ if not args.url:
+ print("-u/--url argument is required when using 'add' command")
+ exit(1)
+ if not args.type:
+ print("-t/--type argument is required when using 'add' command")
+ exit(1)
+ if args.type == "rss":
+ config_dir = os.path.expanduser("~/.config/automedia/rss")
+ os.makedirs(config_dir, exist_ok=True)
+ result = add_rss(args.url, config_dir, args.start_after)
+ if not result:
+ exit(1)
+ elif args.type == "html":
+ if not args.name:
+ print("-n/--name argument is required when using '--add --type html' command")
+ exit(1)
+
+ config_dir = os.path.expanduser("~/.config/automedia/html")
+ os.makedirs(config_dir, exist_ok=True)
+ result = add_html(args.name, args.url, config_dir, args.start_after)
+ if not result:
+ exit(1)
+ elif args.sync:
+ if not args.download_dir:
+ print("-d/--download-dir argument is required when using 'sync' command")
+ exit(1)
+
+ rss_config_dir = os.path.expanduser("~/.config/automedia/rss")
+ os.makedirs(rss_config_dir, exist_ok=True)
+
+ html_config_dir = os.path.expanduser("~/.config/automedia/html")
+ os.makedirs(html_config_dir, exist_ok=True)
+
+ sync_rate_sec = 15 * 60 # every 15 min
+ sync(rss_config_dir, html_config_dir, args.download_dir, sync_rate_sec)
+ pass
+
+if __name__ == "__main__":
+ main()