From 48e757baffbf75bc8a1e4171ad94c27d7356cafa Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Fri, 7 May 2021 07:38:59 +0200
Subject: Migrate mangadex to new api, remove .in_progress files in tracked dir
 if they are old

---
 plugins/lhtranslation.py |   4 +-
 plugins/mangadex.py      | 165 ++++++++++++++++++++++-------------------------
 plugins/mangakatana.py   |   4 +-
 plugins/manganelo.py     |   4 +-
 plugins/manganelos.py    |   4 +-
 plugins/mangaplus.py     |   4 +-
 plugins/mangatown.py     |   4 +-
 plugins/mangawindow.py   |   4 +-
 plugins/readm.py         |   4 +-
 9 files changed, 93 insertions(+), 104 deletions(-)

(limited to 'plugins')
diff --git a/plugins/lhtranslation.py b/plugins/lhtranslation.py
index 153f4ed..082b865 100755
--- a/plugins/lhtranslation.py
+++ b/plugins/lhtranslation.py
@@ -50,7 +50,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -61,7 +61,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath("//div[@class='list-chapters']//a[@class='chapter']"):
-        title = element.find("b").text.strip()
+        title = element.find("b").text.strip().replace("/", "_")
         url = "https://lhtranslation.net/" + element.attrib.get("href").strip()
         if title.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/mangadex.py b/plugins/mangadex.py
index 72b28f3..04bc800 100755
--- a/plugins/mangadex.py
+++ b/plugins/mangadex.py
@@ -21,13 +21,13 @@ def usage():
     exit(1)
 
 def usage_list():
-    print("mangadex.py list <url>")
+    print("mangadex.py list <manga-id>")
     exit(1)
 
 def usage_download():
-    print("mangadex.py download <url> <download_dir>")
+    print("mangadex.py download <chapter-id> <download_dir>")
     print("examples:")
-    print("  mangadex.py download \"https://mangadex.org/title/7139/one-punch-man\" /home/adam/Manga/MangaName")
+    print("  mangadex.py download \"4e4a1ed8-d4a0-4096-86db-ca5e3fc42c5d\" /home/adam/Manga/MangaName")
     print("")
     print("Note: The manga directory has to exist.")
     exit(1)
@@ -35,8 +35,8 @@ def usage_download():
 if len(sys.argv) < 2:
     usage()
 
-def download_file(url, cookies, save_path):
-    with requests.get(url, headers=headers, cookies=cookies, stream=True) as response:
+def download_file(url, save_path):
+    with requests.get(url, headers=headers, stream=True) as response:
         if not response.ok:
             return False
         with open(save_path, "wb") as file:
@@ -45,11 +45,6 @@ def download_file(url, cookies, save_path):
                     file.write(chunk)
     return True
 
-def title_url_extract_manga_id(url):
-    result = re.search("mangadex.org/title/([0-9]+)", url)
-    if result and len(result.groups()) > 0:
-        return result.groups()[0]
-
 def chapter_sort_func(chapter_data):
     return chapter_data[1].get("timestamp", 0)
 
@@ -58,24 +53,16 @@ def chapter_title_extract_number(chapter_title):
     if result and len(result.groups()) > 0:
         return result.groups()[0]
 
-def list_chapters(url, chapter_list_input):
-    manga_id = title_url_extract_manga_id(url)
-    if not manga_id:
-        print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title/<number>/..." % url)
-        exit(2)
-
-    url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id;
-    response = requests.get(url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapters, server responded with status code %d" % response.status_code)
-        exit(2)
+def sort_chapters(chapter):
+    return chapter["chapter"]
 
+def list_chapters(url, chapter_list_input):
     seen_titles = set()
     seen_chapter_numbers = set()
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
             chapter_number = chapter_title_extract_number(title)
             if chapter_number:
                 seen_chapter_numbers.add(chapter_number)
@@ -86,93 +73,95 @@ def list_chapters(url, chapter_list_input):
         if url and len(url) > 0:
             seen_urls.add(url)
 
-    lang = "gb" # english
+    lang = "en"
 
-    json_response = response.json()
-    status = json_response["status"]
-    if status != "OK":
-        print("Expected server response OK, got %s" % status)
-        exit(2)
-
-    chapter_json = json_response["chapter"]
-    time_now = time.time()
+    i = 0
     prev_chapter_number = ""
-    output_chapters = []
-    for chapter_id, chapter in chapter_json.items():
-        timestamp = chapter.get("timestamp", 0)
-        if timestamp > time_now:
-            continue
-
-        lang_code = chapter.get("lang_code", "")
-        if lang_code != lang:
-            continue
-
-        chapter_number_str = chapter.get("chapter", "0")
-        if chapter_number_str == prev_chapter_number:
-            continue
-        prev_chapter_number = chapter_number_str
-
-        chapter_title = chapter.get("title")
-        chapter_url = "https://mangadex.org/chapter/" + chapter_id
-        chapter_name = "Ch. " + chapter_number_str
-        if chapter_title and len(chapter_title) > 0:
-            chapter_name += " - " + chapter_title
-
-        if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls:
+    chapters = []
+    while True:
+        url = "https://api.mangadex.org/chapter?manga=" + url + "&limit=100&offset=%d&order[publishAt]=desc" % (i * 100)
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+
+        if len(response.text) == 0:
             break
 
-        if chapter_number_str in seen_chapter_numbers:
+        json_response = response.json()
+        results = json_response["results"]
+        if len(results) == 0:
             break
 
-        output_chapters.append({ "name": chapter_name, "url": chapter_url })
-    print(json.dumps(output_chapters))
+        for result_item in results:
+            if result_item["result"] != "ok":
+                print("Got item with result != ok")
+                exit(1)
 
-def chapter_url_extract_manga_id(url):
-    result = re.search("mangadex.org/chapter/([0-9]+)", url)
-    if result and len(result.groups()) > 0:
-        return result.groups()[0]
+            data = result_item["data"]
+            id = data["id"]
+            attributes = data["attributes"]
+            if attributes["translatedLanguage"] != lang:
+                continue
 
-def download_chapter(url, download_dir):
-    request_url = url
-    manga_id = chapter_url_extract_manga_id(url)
-    if not manga_id:
-        print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/chapter/<number>" % url)
-        exit(2)
+            chapter_number_str = attributes["chapter"]
+            if chapter_number_str == prev_chapter_number:
+                continue
+            prev_chapter_number = chapter_number_str
 
-    response = requests.get(request_url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapter images, server responded with status code %d" % response.status_code)
-        exit(2)
+            title = "Ch. " + chapter_number_str
+            title_item = attributes.get("title")
+            if title_item:
+                title += " - " + title_item
+            title = title.replace("/", "_")
 
-    cookies = response.cookies
+            if title.lower().replace(" ", "") in seen_titles or id in seen_urls:
+                break
 
-    url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id
+            if chapter_number_str in seen_chapter_numbers:
+                break
 
-    response = requests.get(url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapter images, server responded with status code %d" % response.status_code)
-        exit(2)
+            chapters.append({ "name": title, "url": id, "chapter": int(chapter_number_str) })
+        i += 1
+
+    chapters = sorted(chapters, key=sort_chapters, reverse=True)
+    output_chapters = []
+    for chapter in chapters:
+        output_chapters.append({ "name": chapter["name"], "url": chapter["url"] })
+    print(json.dumps(output_chapters))
+
+def get_base_url_for_chapter(chapter_id):
+    response = requests.get("https://api.mangadex.org/at-home/server/" + chapter_id, headers=headers)
+    response.raise_for_status()
+    return response.json()["baseUrl"]
+
+def download_chapter(url, download_dir):
+    base_url = get_base_url_for_chapter(url)
+    response = requests.get("https://api.mangadex.org/chapter/" + url, headers=headers)
+    response.raise_for_status()
+
+    image_urls = []
+    json_response = response.json()
+    if json_response["result"] != "ok":
+        print("Got item with result != ok")
+        exit(1)
+
+    data = json_response["data"]
+    attributes = data["attributes"]
+    hash = attributes["hash"]
+    image_names = attributes["data"]
+    for image_name in image_names:
+        image_urls.append(base_url + "/data/" + hash + "/" + image_name)
 
     in_progress_filepath = os.path.join(download_dir, ".in_progress")
     with open(in_progress_filepath, "w") as file:
-        file.write(request_url)
+        file.write(url)
 
     img_number = 1
-    json_response = response.json()
-    status = json_response["status"]
-    if status != "OK":
-        print("Expected server response OK, got %s" % status)
-        exit(2)
-
-    chapter_hash = json_response["hash"]
-    server = json_response.get("server", "https://mangadex.org/data/")
-    for image_name in json_response["page_array"]:
-        image_url = "%s%s/%s" % (server, chapter_hash, image_name)
+    for image_url in image_urls:
         ext = image_url[image_url.rfind("."):]
         image_name = str(img_number) + ext
         image_path = os.path.join(download_dir, image_name)
         print("Downloading {} to {}".format(image_url, image_path))
-        if not download_file(image_url, cookies, image_path):
+        if not download_file(image_url, image_path):
             print("Failed to download image: %s" % image_url)
             os.remove(in_progress_filepath)
             exit(2)
diff --git a/plugins/mangakatana.py b/plugins/mangakatana.py
index f2195a4..e90d916 100755
--- a/plugins/mangakatana.py
+++ b/plugins/mangakatana.py
@@ -54,7 +54,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -65,7 +65,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//div[@class="chapters"]//div[@class="chapter"]//a'):
-        element_text = element.text.strip()
+        element_text = element.text.strip().replace("/", "_")
         url = element.attrib.get("href").strip()
         if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/manganelo.py b/plugins/manganelo.py
index 1f0882c..57eb0cc 100755
--- a/plugins/manganelo.py
+++ b/plugins/manganelo.py
@@ -64,7 +64,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -75,7 +75,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//ul[@class="row-content-chapter"]//a'):
-        element_text = element.text.strip()
+        element_text = element.text.strip().replace("/", "_")
         url = element.attrib.get("href").strip()
         if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/manganelos.py b/plugins/manganelos.py
index 7e220d5..54c2625 100755
--- a/plugins/manganelos.py
+++ b/plugins/manganelos.py
@@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//section[@id="examples"]//div[@class="chapter-list"]//a'):
-        element_text = element.text.strip()
+        element_text = element.text.strip().replace("/", "_")
         url = element.attrib.get("href").strip()
         if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/mangaplus.py b/plugins/mangaplus.py
index 7104d80..0d87ddc 100755
--- a/plugins/mangaplus.py
+++ b/plugins/mangaplus.py
@@ -219,7 +219,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -235,7 +235,7 @@ def list_chapters(url, chapter_list_input):
 
     chapters = []
     for chapter in reversed(all_chapters):
-        title = chapter.subtitle
+        title = chapter.subtitle.replace("/", "_")
         url = "https://mangaplus.shueisha.co.jp/viewer/{0}".format(chapter.id)
         if title.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/mangatown.py b/plugins/mangatown.py
index d3a7e04..1a7eae9 100755
--- a/plugins/mangatown.py
+++ b/plugins/mangatown.py
@@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//ul[@class="chapter_list"]//a'):
-        element_text = element.text.strip()
+        element_text = element.text.strip().replace("/", "_")
         url = element.attrib.get("href").strip()
         if "/manga/" not in url:
             continue
diff --git a/plugins/mangawindow.py b/plugins/mangawindow.py
index b5f5bf8..3a8c30f 100755
--- a/plugins/mangawindow.py
+++ b/plugins/mangawindow.py
@@ -51,7 +51,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -66,7 +66,7 @@ def list_chapters(url, chapter_list_input):
         if title is None:
             print("Failed to get title for chapter")
             exit(2)
-        title = title.strip()
+        title = title.strip().replace("/", "_")
         url = "https://mangawindow.net" + element.attrib.get("href").strip()
         if title.lower().replace(" ", "") in seen_titles or url in seen_urls:
             break
diff --git a/plugins/readm.py b/plugins/readm.py
index 3101587..74ae43c 100755
--- a/plugins/readm.py
+++ b/plugins/readm.py
@@ -56,7 +56,7 @@ def list_chapters(url, chapter_list_input):
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
 
     seen_urls = set()
     for item in chapter_list_input:
@@ -67,7 +67,7 @@ def list_chapters(url, chapter_list_input):
     tree = etree.HTML(response.text)
     chapters = []
     for element in tree.xpath('//div[@class="episodes-list"]//a'):
-        element_text = element.text.strip()
+        element_text = element.text.strip().replace("/", "_")
         url = element.attrib.get("href").strip()
         url = "https://readm.org" + url
         if element_text.lower().replace(" ", "") in seen_titles or url in seen_urls:
-- 
cgit v1.2.3-70-g09d2