Fix all chapters redownloading if latest chapter changes name

Remove readms, as it's dead.
author: dec05eba <dec05eba@protonmail.com> 2020-01-01 10:34:21 +0100
committer: dec05eba <dec05eba@protonmail.com> 2020-07-06 07:12:34 +0200
commit: 3586c8d003077ee32b541f00d7690ae179448963 (patch)
tree: 9befe18f02e89acb7fb0b4d8768221a6374b6656
parent: 08ad0983f4e5473b08cbf5b51aa8dc7d29b5d862 (diff)
3 files changed, 64 insertions, 155 deletions
diff --git a/automedia.py b/automedia.py
index a8ae028..272f853 100755
--- a/automedia.py
+++ b/automedia.py
@@ -113,7 +113,7 @@ def get_tracked_rss(rss_tracked_dir, existing_tracked_rss):
     except FileNotFoundError:
         return []
 
-def rss_update_latest(rss_tracked_dir, rss, latest):
+def rss_update_latest(rss_tracked_dir, rss, latest, url):
     with open(os.path.join(rss_tracked_dir, rss.title, "latest"), "w") as file:
         file.write(latest)
 
@@ -122,11 +122,11 @@ def rss_update_latest(rss_tracked_dir, rss, latest):
         file.write(updated)
 
     rss.json_data["updated"] = updated
-    rss.json_data["downloaded"].append({ "title": latest, "time": updated })
+    rss.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url })
     with open(os.path.join(rss_tracked_dir, rss.title, "data"), "w") as file:
         json.dump(rss.json_data, file, indent=4)
 
-def html_update_latest(html_tracked_dir, html, latest):
+def html_update_latest(html_tracked_dir, html, latest, url):
     with open(os.path.join(html_tracked_dir, html.title, "latest"), "w") as file:
         file.write(latest)
 
@@ -135,7 +135,7 @@ def html_update_latest(html_tracked_dir, html, latest):
         file.write(updated)
 
     html.json_data["updated"] = updated
-    html.json_data["downloaded"].append({ "title": latest, "time": updated })
+    html.json_data["downloaded"].append({ "title": latest, "time": updated, "url": url })
     with open(os.path.join(html_tracked_dir, html.title, "data"), "w") as file:
         json.dump(html.json_data, file, indent=4)
 
@@ -270,11 +270,13 @@ def add_rss(name, url, rss_config_dir, start_after):
         print("Name not provided and name in rss is empty")
         return False
 
+    start_after_url = None
     found_start_after = False
     for item in feed["items"]:
         title = item["title"].replace("/", "_").strip()
         if start_after and title == start_after:
             found_start_after = True
+            start_after_url = item["link"]
             break
     
     if start_after and not found_start_after:
@@ -310,7 +312,7 @@ def add_rss(name, url, rss_config_dir, start_after):
         "downloaded": []
     }
     if start_after:
-        data["downloaded"].append({ "title": start_after, "time": updated })
+        data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url })
 
     with open(os.path.join(rss_dir, "data"), "w") as file:
         json.dump(data, file, indent=4)
@@ -339,6 +341,7 @@ def add_html(name, url, html_config_dir, start_after):
         print("Name not provided or empty")
         return False
  
+    start_after_url = None
     if start_after:
         items = plugin_list(plugin_path, url, None)
         if items:
@@ -347,6 +350,7 @@ def add_html(name, url, html_config_dir, start_after):
                 title = item["name"].replace("/", "_").strip()
                 if start_after and title == start_after:
                     found_start_after = True
+                    start_after_url = item["url"]
                     break
             
             if not found_start_after:
@@ -386,7 +390,7 @@ def add_html(name, url, html_config_dir, start_after):
         "downloaded": []
     }
     if start_after:
-        data["downloaded"].append({ "title": start_after, "time": updated })
+        data["downloaded"].append({ "title": start_after, "time": updated, "url": start_after_url })
 
     with open(os.path.join(html_dir, "data"), "w") as file:
         json.dump(data, file, indent=4)
@@ -410,12 +414,19 @@ def sync_rss(tracked_rss):
             show_notification("RSS Sync failed", "Failed to parse rss for url {}, error: {}".format(tracked_rss.link, str(feed.bozo_exception)), urgency="critical")
         return None
 
+    seen_titles = set()
+    seen_urls = set()
+    for downloaded_item in tracked_rss.json_data["downloaded"]:
+        seen_titles.add(downloaded_item["title"].lower().replace(" ", ""))
+        seen_urls.add(downloaded_item.get("url", ""))
+
     items = []
     for item in feed["items"]:
         title = item["title"].replace("/", "_").strip()
+        link = item["link"]
         # TODO: Goto next page in rss if supported, if we cant find our item on the first page
         #if not get_downloaded_item_by_title(tracked_rss, title):
-        if tracked_rss.latest and title == tracked_rss.latest:
+        if title.lower().replace(" ", "") in seen_titles or link in seen_urls:
             break
         items.append(item)
 
@@ -424,9 +435,9 @@ def sync_rss(tracked_rss):
     latest = None
     for item in reversed(items):
         title = item["title"].replace("/", "_").strip()
-        rss_update_latest(rss_tracked_dir, tracked_rss, title)
-
         link = item["link"]
+        rss_update_latest(rss_tracked_dir, tracked_rss, title, link)
+
         if not add_torrent(link):
             return latest
         latest = title
@@ -436,11 +447,18 @@ def sync_rss(tracked_rss):
 
 def plugin_list(plugin_path, url, latest):
     if not latest:
-        latest = ""
-    process = subprocess.Popen([plugin_path, "list", url, latest], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = process.communicate()
+        latest = []
+
+    plugin_name = os.path.basename(plugin_path)
+    process = None
+    try:
+        process = subprocess.Popen([plugin_path, "list", url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+    except FileNotFoundError as e:
+        print("{}: Plugin failed: Failed to launch plugin list for plugin {}, error: {}".format(str(datetime.today().isoformat()), plugin_name, str(e)))
+        return None
+
+    stdout, stderr = process.communicate(json.dumps(latest).encode())
     if process.returncode != 0:
-        plugin_name = os.path.basename(plugin_path)
         print("{}: Plugin failed: Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(str(datetime.today().isoformat()), plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8')))
         if not only_show_finished_notification:
             show_notification("Plugin failed", "Failed to launch plugin list for plugin {} and url {}, error: stdout: {}, stderr: {}".format(plugin_name, url, stdout.decode('utf-8'), stderr.decode('utf-8')), urgency="critical")
@@ -449,7 +467,6 @@ def plugin_list(plugin_path, url, latest):
     try:
         return json.loads(stdout.decode('utf-8'))
     except json.decoder.JSONDecodeError as e:
-        plugin_name = os.path.basename(plugin_path)
         if not only_show_finished_notification:
             show_notification("Plugin failed", "Failed to json decode response of plugin {}, error: {}".format(plugin_name, str(e)), urgency="critical")
         return None
@@ -494,16 +511,19 @@ def resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id):
     except FileNotFoundError as e:
         pass
 
+def build_plugin_list_input(tracked_html):
+    result = []
+    for downloaded_item in tracked_html.json_data["downloaded"]:
+        result.append({ "title": downloaded_item["title"], "url": downloaded_item.get("url", "") })
+    return result
+
 # Return the title of the newest item
 def sync_html(tracked_html, download_dir, session_id):
     plugin_entry = os.path.join(script_dir, "plugins", tracked_html.plugin)
     resume_tracked_html(plugin_entry, download_dir, tracked_html, session_id)
     html_tracked_dir = os.path.join(html_config_dir, "tracked")
 
-    # TODO: Instead of using item name to track which ones to download newer item than,
-    # use a number which should be the number of items that have already been downloaded.
-    # The reason being that some sites may rename items that we are tracking, for example
-    # when tracking chapter names and the chapter doesn't have a name yet.
+    # The program takes and index starting from 1, which is the chapter number
 
     # Program should print the names of each item (chapter for manga) after "latest", sorted by newest to oldest
     # along with the urls to them.
@@ -520,7 +540,7 @@ def sync_html(tracked_html, download_dir, session_id):
     #   ]
     # ./program list url latest
     # Note: @latest argument here is optional
-    items = plugin_list(plugin_entry, tracked_html.link, tracked_html.latest)
+    items = plugin_list(plugin_entry, tracked_html.link, build_plugin_list_input(tracked_html))
     if not items:
         return None
 
@@ -539,7 +559,7 @@ def sync_html(tracked_html, download_dir, session_id):
         with open(os.path.join(item_dir, ".session_id"), "w") as file:
             file.write(session_id)
 
-        html_update_latest(html_tracked_dir, tracked_html, name)
+        html_update_latest(html_tracked_dir, tracked_html, name, url)
 
         if not plugin_download(plugin_entry, url, item_dir):
             return latest
diff --git a/plugins/manganelo.py b/plugins/manganelo.py
index 12813a1..9d8a644 100755
--- a/plugins/manganelo.py
+++ b/plugins/manganelo.py
@@ -17,16 +17,13 @@ def usage():
     exit(1)
 
 def usage_list():
-    print("manganelo.py list <url> [latest]")
-    print("examples:")
-    print("  manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\"")
-    print("  manganelo.py list \"https://mangakakalot.com/manga/assassins_pride\" \"Chapter 13\"")
+    print("manganelo.py list <url>")
     exit(1)
 
 def usage_download():
     print("manganelo.py download <url> <download_dir>")
     print("examples:")
-    print("  manganelo.py download \"https://mangakakalot.com/chapter/vy918232/chapter_16\" /home/adam/Manga/MangaName")
+    print("  manganelo.py download \"https://manganelo.com/chapter/read_naruto_manga_online_free3/chapter_700.5\" /home/adam/Manga/MangaName")
     print("")
     print("Note: The manga directory has to exist.")
     exit(1)
@@ -42,19 +39,32 @@ def download_file(url, save_path):
         return False
     return True
 
-def list_chapters(url, latest):
+def list_chapters(url, chapter_list_input):
     response = requests.get(url)
     if response.status_code != 200:
         print("Failed to list chapters, server responded with status code %d" % response.status_code)
         exit(2)
 
+    seen_titles = set()
+    for item in chapter_list_input:
+        title = item.get("title")
+        if title and len(title) > 0:
+            seen_titles.add(title.lower().replace(" ", ""))
+
+    seen_urls = set()
+    for item in chapter_list_input:
+        url = item.get("url")
+        if url and len(url) > 0:
+            seen_urls.add(url.replace("mangakakalot", "manganelo"))
+
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//ul[@class="row-content-chapter"]//a'):
         element_text = element.text.strip()
-        if latest and element_text == latest:
+        url = element.attrib.get("href").strip()
+        if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
-        chapters.append({ "name": element_text, "url": element.attrib.get("href").strip() })
+        chapters.append({ "name": element_text, "url": url })
     print(json.dumps(chapters))
 
 def download_chapter(url, download_dir):
@@ -89,10 +99,12 @@ if command == "list":
         usage_list()
     
     url = sys.argv[2].replace("mangakakalot", "manganelo")
-    latest = ""
-    if len(sys.argv) >= 4:
-        latest = sys.argv[3]
-    list_chapters(url, latest)
+    chapter_list_input = sys.stdin.read()
+    if len(chapter_list_input) == 0:
+        chapter_list_input = []
+    else:
+        chapter_list_input = json.loads(chapter_list_input)
+    list_chapters(url, chapter_list_input)
 elif command == "download":
     if len(sys.argv) < 4:
         usage_download()
diff --git a/plugins/readms.py b/plugins/readms.py
deleted file mode 100755
index a5343b8..0000000
--- a/plugins/readms.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import time
-import sys
-import requests
-import json
-import subprocess
-
-from lxml import etree
-
-def usage():
-    print("readms.py command")
-    print("commands:")
-    print("  download")
-    print("  list")
-    exit(1)
-
-def usage_list():
-    print("readms.py list <url> [latest]")
-    print("examples:")
-    print("  readms.py list \"https://readms.net/manga/a_trail_of_blood\"")
-    print("  readms.py list \"https://readms.net/manga/a_trail_of_blood\" \"48 - Blood oath\"")
-    exit(1)
-
-def usage_download():
-    print("readms.py download <url> <download_dir>")
-    print("examples:")
-    print("  readms.py download \"https://readms.net/manga/a_trail_of_blood\" /home/adam/Manga/MangaName")
-    print("")
-    print("Note: The manga directory has to exist.")
-    exit(1)
-
-if len(sys.argv) < 2:
-    usage()
-
-def download_file(url, save_path):
-    process = subprocess.Popen(["wget", "-q", "-o", "/dev/null", "-O", save_path, url], stderr=subprocess.PIPE)
-    _, stderr = process.communicate()
-    if process.returncode != 0:
-        print("Failed to download file: {}, error: {}".format(url, stderr.decode('utf-8')))
-        return False
-    return True
-
-def list_chapters(url, latest):
-    response = requests.get(url)
-    if response.status_code != 200:
-        print("Failed to list chapters, server responded with status code %d" % response.status_code)
-        exit(2)
-
-    tree = etree.HTML(response.text)
-    chapters = []
-    for element in tree.xpath('//table//tr//a'):
-        element_text = element.text.strip()
-        if latest and element_text == latest:
-            break
-        chapters.append({ "name": element_text, "url": "https://readms.net" + element.attrib.get("href").strip() })
-    print(json.dumps(chapters))
-
-def download_chapter(url, download_dir):
-    in_progress_filepath = os.path.join(download_dir, ".in_progress")
-    with open(in_progress_filepath, "w") as file:
-        file.write(url)
-        
-    img_number = 1
-    while True:
-        response = requests.get(url)
-        if response.status_code != 200:
-            print("Failed to list chapters, server responded with status code %d" % response.status_code)
-            exit(2)
-
-        tree = etree.HTML(response.text)
-        
-        image_sources = tree.xpath('//img[@id="manga-page"]/@src')
-        if len(image_sources) != 1:
-            break
-
-        image_source = "https:" + image_sources[0]
-        ext = image_source[image_source.rfind("."):]
-        image_name = str(img_number) + ext
-        image_path = os.path.join(download_dir, image_name)
-        print("Downloading {} to {}".format(image_source, image_path))
-        if not download_file(image_source, image_path):
-            exit(1)
-
-        next_pages = tree.xpath('//div[@class="page"]//a/@href')
-        if len(next_pages) != 1:
-            break
-
-        next_page = next_pages[0]
-        last_slash = next_page.rfind('/')
-        try:
-            if last_slash != -1 and int(next_page[last_slash+1:]) <= img_number:
-                break
-        except ValueError:
-            pass
-
-        url = "https://readms.net" + next_page
-        img_number += 1
-
-    with open(os.path.join(download_dir, ".finished"), "w") as file:
-        file.write("1")
-
-    os.remove(in_progress_filepath)
-
-command = sys.argv[1]
-if command == "list":
-    if len(sys.argv) < 3:
-        usage_list()
-    
-    url = sys.argv[2]
-    latest = ""
-    if len(sys.argv) >= 4:
-        latest = sys.argv[3]
-    list_chapters(url, latest)
-elif command == "download":
-    if len(sys.argv) < 4:
-        usage_download()
-    url = sys.argv[2]
-    download_dir = sys.argv[3]
-    download_chapter(url, download_dir)
-else:
-    usage()
author	dec05eba <dec05eba@protonmail.com>	2020-01-01 10:34:21 +0100
committer	dec05eba <dec05eba@protonmail.com>	2020-07-06 07:12:34 +0200
commit	3586c8d003077ee32b541f00d7690ae179448963 (patch)
tree	9befe18f02e89acb7fb0b4d8768221a6374b6656
parent	08ad0983f4e5473b08cbf5b51aa8dc7d29b5d862 (diff)