aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-01-01 10:34:21 +0100
committerdec05eba <dec05eba@protonmail.com>2020-07-06 07:12:34 +0200
commit3586c8d003077ee32b541f00d7690ae179448963 (patch)
tree9befe18f02e89acb7fb0b4d8768221a6374b6656
parent08ad0983f4e5473b08cbf5b51aa8dc7d29b5d862 (diff)
Fix all chapters redownloading if latest chapter changes name
Remove readms, as it's dead.
-rwxr-xr-xautomedia.py60
-rwxr-xr-xplugins/manganelo.py36
-rwxr-xr-xplugins/readms.py123
3 files changed, 64 insertions, 155 deletions
diff --git a/automedia.py b/automedia.py
index a8ae028..272f853 100755
--- a/automedia.py
+++ b/automedia.py
@@ -113,7 +113,7 @@ def get_tracked_rss(rss_tracked_dir, existing_tracked_rss):
except FileNotFoundError:
return []
-def rss_update_latest(rss_tracked_dir, rss, latest):
+def rss_update_latest(rss_tracked_dir, rss, latest, url):
with open(os.path.join(rss_tracked_dir, rss.title, "latest"), "w") as file:
file.write(latest)
@@ -122,11 +122,11 @@ def rss_update_latest(rss_tracked_dir, rss, latest):
file.write(updated)
rss.json_data["updated"] = updated
- rss.json_data["downloaded"].append({ "title": latest, "time": updated })
+ rss.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url })
with open(os.path.join(rss_tracked_dir, rss.title, "data"), "w") as file:
json.dump(rss.json_data, file, indent=4)
-def html_update_latest(html_tracked_dir, html, latest):
+def html_update_latest(html_tracked_dir, html, latest, url):
with open(os.path.join(html_tracked_dir, html.title, "latest"), "w") as file:
file.write(latest)
@@ -135,7 +135,7 @@ def html_update_latest(html_tracked_dir, html, latest):
file.write(updated)
html.json_data["updated"] = updated
- html.json_data["downloaded"].append({ "title": latest, "time": updated })
+ html.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url })
with open(os.path.join(html_tracked_dir, html.title, "data"), "w") as file:
json.dump(html.json_data, file, indent=4)
@@ -270,11 +270,13 @@ def add_rss(name, url, rss_config_dir, start_after):
print("Name not provided and name in rss is empty")
return False
+ start_after_url = None
found_start_after = False
for item in feed["items"]:
title = item["title"].replace("/", "_").strip()
if start_after and title == start_after:
found_start_after = True
+ start_after_url = item["link"]
break
if start_after and not found_start_after:
@@ -310,7 +312,7 @@ def add_rss(name, url, rss_config_dir, start_after):
"downloaded": []
}
if start_after:
- data["downloaded"].append({ "title": start_after, "time": updated })
+ data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url })
with open(os.path.join(rss_dir, "data"), "w") as file:
json.dump(data, file, indent=4)
@@ -339,6 +341,7 @@ def add_html(name, url, html_config_dir, start_after):
print("Name not provided or empty")
return False
+ start_after_url = None
if start_after:
items = plugin_list(plugin_path, url, None)
if items:
@@ -347,6 +350,7 @@ def add_html(name, url, html_config_dir, start_after):
title = item["name"].replace("/", "_").strip()
if start_after and title == start_after:
found_start_after = True
+ start_after_url = item["url"]
break
if not found_start_after:
@@ -386,7 +390,7 @@ def add_html(name, url, html_config_dir, start_after):
"downloaded": []
}
if start_after:
- data["downloaded"].append({ "title": start_after, "time": updated })
+ data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url })
with open(os.path.join(html_dir, "data"), "w") as file:
json.dump(data, file, indent=4)
@@ -410,12 +414,19 @@ def sync_rss(tracked_rss):
show_notification("RSS Sync failed", "Failed to parse rss for url {}, error: {}".format(tracked_rss.link, str(feed.bozo_exception)), urgency="critical")
return None
+ seen_titles = set()
+ seen_urls = set()
+ for downloaded_item in tracked_rss.json_data["downloaded"]:
+ seen_titles.add(downloaded_item["title"].lower().replace(" ", ""))
+ seen_urls.add(downloaded_item.get("url", ""))
+
items = []
for item in feed["items"]:
title = item["title"].replace("/", "_").strip()
+ link = item["link"]
# TODO: Goto next page in rss if supported, if we cant find our item on the first page
#if not get_downloaded_item_by_title(tracked_rss, title):
- if tracked_rss.latest and title == tracked_rss.latest:
+ if title.lower().replace(" ", "") in seen_titles or link in seen_urls:
break
items.append(item)
@@ -424,9 +435,9 @@ def sync_rss(tracked_rss):
latest = None
for item in reversed(items):
title = item["title"].replace("/", "_").strip()
- rss_update_latest(rss_tracked_dir, tracked_rss, title)
-
link = item["link"]
+ rss_update_latest(rss_tracked_dir, tracked_rss, title, link)
+
if not add_torrent(link):
return latest
latest = title
@@ -436,11 +447,18 @@ def sync_rss(tracked_rss):
def plugin_list(plugin_path, url, latest):
if not latest:
- latest = ""
- process = subprocess.Popen([plugin_path, "list", url, latest], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = process.communicate()
+ latest = []
+
+ plugin_name = os.path.basename(plugin_path)
+ process = None
+ try:
+ process = subprocess.Popen([plugin_path, "list", url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except FileNotFoundError as e:
+ print("{}: Plugin failed: Failed to launch plugin list for plugin {}, error: {}".format(str(datetime.today().isoformat()), plugin_name, str(e)))
+ return None
+
+ stdout, stderr = process.communicate(json.dumps(latest).encode())
if process.returncode != 0:
- plugin_name = os.path.basename(plugin_path)
print("{}: Plugin failed: Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(str(datetime.today().isoformat()), plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8')))
if not only_show_finished_notification:
show_notification("Plugin failed", "Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8')), urgency="critical")
@@ -449,7 +467,6 @@ def plugin_list(plugin_path, url, latest):
try:
return json.loads(stdout.decode('utf-8'))
except json.decoder.JSONDecodeError as e:
- plugin_name = os.path.basename(plugin_path)
if not only_show_finished_notification:
show_notification("Plugin failed", "Failed to json decode response of plugin {}, error: {}".format(plugin_name, str(e)), urgency="critical")
return None
@@ -494,16 +511,19 @@ def resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id):
except FileNotFoundError as e:
pass
+def build_plugin_list_input(tracked_html):
+ result = []
+ for downloaded_item in tracked_html.json_data["downloaded"]:
+ result.append({ "title": downloaded_item["title"], "url": downloaded_item.get("url", "") })
+ return result
+
# Return the title of the newest item
def sync_html(tracked_html, download_dir, session_id):
plugin_entry = os.path.join(script_dir, "plugins", tracked_html.plugin)
resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id)
html_tracked_dir = os.path.join(html_config_dir, "tracked")
- # TODO: Instead of using item name to track which ones to download newer item than,
- # use a number which should be the number of items that have already been downloaded.
- # The reason being that some sites may rename items that we are tracking, for example
- # when tracking chapter names and the chapter doesn't have a name yet.
+ # The program takes and index starting from 1, which is the chapter number
# Program should print the names of each item (chapter for manga) after "latest", sorted by newest to oldest
# along with the urls to them.
@@ -520,7 +540,7 @@ def sync_html(tracked_html, download_dir, session_id):
# ]
# ./program list url latest
# Note: @latest argument here is optional
- items = plugin_list(plugin_entry, tracked_html.link, tracked_html.latest)
+ items = plugin_list(plugin_entry, tracked_html.link, build_plugin_list_input(tracked_html))
if not items:
return None
@@ -539,7 +559,7 @@ def sync_html(tracked_html, download_dir, session_id):
with open(os.path.join(item_dir, ".session_id"), "w") as file:
file.write(session_id)
- html_update_latest(html_tracked_dir, tracked_html, name)
+ html_update_latest(html_tracked_dir, tracked_html, name, url)
if not plugin_download(plugin_entry, url, item_dir):
return latest
diff --git a/plugins/manganelo.py b/plugins/manganelo.py
index 12813a1..9d8a644 100755
--- a/plugins/manganelo.py
+++ b/plugins/manganelo.py
@@ -17,16 +17,13 @@ def usage():
exit(1)
def usage_list():
- print("manganelo.py list <url> [latest]")
- print("examples:")
- print(" manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\"")
- print(" manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\" \"Chapter 13\"")
+ print("manganelo.py list <url>")
exit(1)
def usage_download():
print("manganelo.py download <url> <download_dir>")
print("examples:")
- print(" manganelo.py download \"https://mangakakalot.com/chapter/vy918232/chapter_16\" /home/adam/Manga/MangaName")
+ print(" manganelo.py download \"https://manganelo.com/chapter/read_naruto_manga_online_free3/chapter_700.5\" /home/adam/Manga/MangaName")
print("")
print("Note: The manga directory has to exist.")
exit(1)
@@ -42,19 +39,32 @@ def download_file(url, save_path):
return False
return True
-def list_chapters(url, latest):
+def list_chapters(url, chapter_list_input):
response = requests.get(url)
if response.status_code != 200:
print("Failed to list chapters, server responded with status code %d" % response.status_code)
exit(2)
+ seen_titles = set()
+ for item in chapter_list_input:
+ title = item.get("title")
+ if title and len(title) > 0:
+ seen_titles.add(title.lower().replace(" ", ""))
+
+ seen_urls = set()
+ for item in chapter_list_input:
+ url = item.get("url")
+ if url and len(url) > 0:
+ seen_urls.add(url.replace("mangakakalot", "manganelo"))
+
tree = etree.HTML(response.text)
chapters = []
for element in tree.xpath('//ul[@class="row-content-chapter"]//a'):
element_text = element.text.strip()
- if latest and element_text == latest:
+ url = element.attrib.get("href").strip()
+ if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
break
- chapters.append({ "name": element_text, "url": element.attrib.get("href").strip() })
+ chapters.append({ "name": element_text, "url": url })
print(json.dumps(chapters))
def download_chapter(url, download_dir):
@@ -89,10 +99,12 @@ if command == "list":
usage_list()
url = sys.argv[2].replace("mangakakalot", "manganelo")
- latest = ""
- if len(sys.argv) >= 4:
- latest = sys.argv[3]
- list_chapters(url, latest)
+ chapter_list_input = sys.stdin.read()
+ if len(chapter_list_input) == 0:
+ chapter_list_input = []
+ else:
+ chapter_list_input = json.loads(chapter_list_input)
+ list_chapters(url, chapter_list_input)
elif command == "download":
if len(sys.argv) < 4:
usage_download()
diff --git a/plugins/readms.py b/plugins/readms.py
deleted file mode 100755
index a5343b8..0000000
--- a/plugins/readms.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import time
-import sys
-import requests
-import json
-import subprocess
-
-from lxml import etree
-
-def usage():
- print("readms.py command")
- print("commands:")
- print(" download")
- print(" list")
- exit(1)
-
-def usage_list():
- print("readms.py list <url> [latest]")
- print("examples:")
- print(" readms.py list \"https://readms.net/manga/a_trail_of_blood\"")
- print(" readms.py list \"https://readms.net/manga/a_trail_of_blood\" \"48 - Blood oath\"")
- exit(1)
-
-def usage_download():
- print("readms.py download <url> <download_dir>")
- print("examples:")
- print(" readms.py download \"https://readms.net/manga/a_trail_of_blood\" /home/adam/Manga/MangaName")
- print("")
- print("Note: The manga directory has to exist.")
- exit(1)
-
-if len(sys.argv) < 2:
- usage()
-
-def download_file(url, save_path):
- process = subprocess.Popen(["wget", "-q", "-o", "/dev/null", "-O", save_path, url], stderr=subprocess.PIPE)
- _, stderr = process.communicate()
- if process.returncode != 0:
- print("Failed to download file: {}, error: {}".format(url, stderr.decode('utf-8')))
- return False
- return True
-
-def list_chapters(url, latest):
- response = requests.get(url)
- if response.status_code != 200:
- print("Failed to list chapters, server responded with status code %d" % response.status_code)
- exit(2)
-
- tree = etree.HTML(response.text)
- chapters = []
- for element in tree.xpath('//table//tr//a'):
- element_text = element.text.strip()
- if latest and element_text == latest:
- break
- chapters.append({ "name": element_text, "url": "https://readms.net" + element.attrib.get("href").strip() })
- print(json.dumps(chapters))
-
-def download_chapter(url, download_dir):
- in_progress_filepath = os.path.join(download_dir, ".in_progress")
- with open(in_progress_filepath, "w") as file:
- file.write(url)
-
- img_number = 1
- while True:
- response = requests.get(url)
- if response.status_code != 200:
- print("Failed to list chapters, server responded with status code %d" % response.status_code)
- exit(2)
-
- tree = etree.HTML(response.text)
-
- image_sources = tree.xpath('//img[@id="manga-page"]/@src')
- if len(image_sources) != 1:
- break
-
- image_source = "https:" + image_sources[0]
- ext = image_source[image_source.rfind("."):]
- image_name = str(img_number) + ext
- image_path = os.path.join(download_dir, image_name)
- print("Downloading {} to {}".format(image_source, image_path))
- if not download_file(image_source, image_path):
- exit(1)
-
- next_pages = tree.xpath('//div[@class="page"]//a/@href')
- if len(next_pages) != 1:
- break
-
- next_page = next_pages[0]
- last_slash = next_page.rfind('/')
- try:
- if last_slash != -1 and int(next_page[last_slash+1:]) <= img_number:
- break
- except ValueError:
- pass
-
- url = "https://readms.net" + next_page
- img_number += 1
-
- with open(os.path.join(download_dir, ".finished"), "w") as file:
- file.write("1")
-
- os.remove(in_progress_filepath)
-
-command = sys.argv[1]
-if command == "list":
- if len(sys.argv) < 3:
- usage_list()
-
- url = sys.argv[2]
- latest = ""
- if len(sys.argv) >= 4:
- latest = sys.argv[3]
- list_chapters(url, latest)
-elif command == "download":
- if len(sys.argv) < 4:
- usage_download()
- url = sys.argv[2]
- download_dir = sys.argv[3]
- download_chapter(url, download_dir)
-else:
- usage()