Migrate mangadex to new api, remove .in_progress files in tracked dir if they are old

author: dec05eba <dec05eba@protonmail.com> 2021-05-07 07:38:59 +0200
committer: dec05eba <dec05eba@protonmail.com> 2021-05-07 07:38:59 +0200
commit: 48e757baffbf75bc8a1e4171ad94c27d7356cafa (patch)
tree: 91cad8a939725bd3a52bf2bc726620d164d64a57 /plugins/mangadex.py
parent: 52c63554190b8421c6f2db72d490f50364a2e23d (diff)
1 files changed, 77 insertions, 88 deletions
diff --git a/plugins/mangadex.py b/plugins/mangadex.py
index 72b28f3..04bc800 100755
--- a/plugins/mangadex.py
+++ b/plugins/mangadex.py
@@ -21,13 +21,13 @@ def usage():
     exit(1)
 
 def usage_list():
-    print("mangadex.py list <url>")
+    print("mangadex.py list <manga-id>")
     exit(1)
 
 def usage_download():
-    print("mangadex.py download <url> <download_dir>")
+    print("mangadex.py download <chapter-id> <download_dir>")
     print("examples:")
-    print("  mangadex.py download \"https://mangadex.org/title/7139/one-punch-man\" /home/adam/Manga/MangaName")
+    print("  mangadex.py download \"4e4a1ed8-d4a0-4096-86db-ca5e3fc42c5d\" /home/adam/Manga/MangaName")
     print("")
     print("Note: The manga directory has to exist.")
     exit(1)
@@ -35,8 +35,8 @@ def usage_download():
 if len(sys.argv) < 2:
     usage()
 
-def download_file(url, cookies, save_path):
-    with requests.get(url, headers=headers, cookies=cookies, stream=True) as response:
+def download_file(url, save_path):
+    with requests.get(url, headers=headers, stream=True) as response:
         if not response.ok:
             return False
         with open(save_path, "wb") as file:
@@ -45,11 +45,6 @@ def download_file(url, cookies, save_path):
                     file.write(chunk)
     return True
 
-def title_url_extract_manga_id(url):
-    result = re.search("mangadex.org/title/([0-9]+)", url)
-    if result and len(result.groups()) > 0:
-        return result.groups()[0]
-
 def chapter_sort_func(chapter_data):
     return chapter_data[1].get("timestamp", 0)
 
@@ -58,24 +53,16 @@ def chapter_title_extract_number(chapter_title):
     if result and len(result.groups()) > 0:
         return result.groups()[0]
 
-def list_chapters(url, chapter_list_input):
-    manga_id = title_url_extract_manga_id(url)
-    if not manga_id:
-        print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title/<number>/..." % url)
-        exit(2)
-
-    url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id;
-    response = requests.get(url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapters, server responded with status code %d" % response.status_code)
-        exit(2)
+def sort_chapters(chapter):
+    return chapter["chapter"]
 
+def list_chapters(url, chapter_list_input):
     seen_titles = set()
     seen_chapter_numbers = set()
     for item in chapter_list_input:
         title = item.get("title")
         if title and len(title) > 0:
-            seen_titles.add(title.lower().replace(" ", ""))
+            seen_titles.add(title.lower().replace(" ", "").replace("/", "_"))
             chapter_number = chapter_title_extract_number(title)
             if chapter_number:
                 seen_chapter_numbers.add(chapter_number)
@@ -86,93 +73,95 @@ def list_chapters(url, chapter_list_input):
         if url and len(url) > 0:
             seen_urls.add(url)
 
-    lang = "gb" # english
+    lang = "en"
 
-    json_response = response.json()
-    status = json_response["status"]
-    if status != "OK":
-        print("Expected server response OK, got %s" % status)
-        exit(2)
-
-    chapter_json = json_response["chapter"]
-    time_now = time.time()
+    i = 0
     prev_chapter_number = ""
-    output_chapters = []
-    for chapter_id, chapter in chapter_json.items():
-        timestamp = chapter.get("timestamp", 0)
-        if timestamp > time_now:
-            continue
-
-        lang_code = chapter.get("lang_code", "")
-        if lang_code != lang:
-            continue
-
-        chapter_number_str = chapter.get("chapter", "0")
-        if chapter_number_str == prev_chapter_number:
-            continue
-        prev_chapter_number = chapter_number_str
-
-        chapter_title = chapter.get("title")
-        chapter_url = "https://mangadex.org/chapter/" + chapter_id
-        chapter_name = "Ch. " + chapter_number_str
-        if chapter_title and len(chapter_title) > 0:
-            chapter_name += " - " + chapter_title
-
-        if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls:
+    chapters = []
+    while True:
+        url = "https://api.mangadex.org/chapter?manga=" + url + "&limit=100&offset=%d&order[publishAt]=desc" % (i * 100)
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+
+        if len(response.text) == 0:
             break
 
-        if chapter_number_str in seen_chapter_numbers:
+        json_response = response.json()
+        results = json_response["results"]
+        if len(results) == 0:
             break
 
-        output_chapters.append({ "name": chapter_name, "url": chapter_url })
-    print(json.dumps(output_chapters))
+        for result_item in results:
+            if result_item["result"] != "ok":
+                print("Got item with result != ok")
+                exit(1)
 
-def chapter_url_extract_manga_id(url):
-    result = re.search("mangadex.org/chapter/([0-9]+)", url)
-    if result and len(result.groups()) > 0:
-        return result.groups()[0]
+            data = result_item["data"]
+            id = data["id"]
+            attributes = data["attributes"]
+            if attributes["translatedLanguage"] != lang:
+                continue
 
-def download_chapter(url, download_dir):
-    request_url = url
-    manga_id = chapter_url_extract_manga_id(url)
-    if not manga_id:
-        print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/chapter/<number>" % url)
-        exit(2)
+            chapter_number_str = attributes["chapter"]
+            if chapter_number_str == prev_chapter_number:
+                continue
+            prev_chapter_number = chapter_number_str
 
-    response = requests.get(request_url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapter images, server responded with status code %d" % response.status_code)
-        exit(2)
+            title = "Ch. " + chapter_number_str
+            title_item = attributes.get("title")
+            if title_item:
+                title += " - " + title_item
+            title = title.replace("/", "_")
 
-    cookies = response.cookies
+            if title.lower().replace(" ", "") in seen_titles or id in seen_urls:
+                break
 
-    url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id
+            if chapter_number_str in seen_chapter_numbers:
+                break
 
-    response = requests.get(url, headers=headers)
-    if response.status_code != 200:
-        print("Failed to list chapter images, server responded with status code %d" % response.status_code)
-        exit(2)
+            chapters.append({ "name": title, "url": id, "chapter": int(chapter_number_str) })
+        i += 1
+
+    chapters = sorted(chapters, key=sort_chapters, reverse=True)
+    output_chapters = []
+    for chapter in chapters:
+        output_chapters.append({ "name": chapter["name"], "url": chapter["url"] })
+    print(json.dumps(output_chapters))
+
+def get_base_url_for_chapter(chapter_id):
+    response = requests.get("https://api.mangadex.org/at-home/server/" + chapter_id, headers=headers)
+    response.raise_for_status()
+    return response.json()["baseUrl"]
+
+def download_chapter(url, download_dir):
+    base_url = get_base_url_for_chapter(url)
+    response = requests.get("https://api.mangadex.org/chapter/" + url, headers=headers)
+    response.raise_for_status()
+
+    image_urls = []
+    json_response = response.json()
+    if json_response["result"] != "ok":
+        print("Got item with result != ok")
+        exit(1)
+
+    data = json_response["data"]
+    attributes = data["attributes"]
+    hash = attributes["hash"]
+    image_names = attributes["data"]
+    for image_name in image_names:
+        image_urls.append(base_url + "/data/" + hash + "/" + image_name)
 
     in_progress_filepath = os.path.join(download_dir, ".in_progress")
     with open(in_progress_filepath, "w") as file:
-        file.write(request_url)
+        file.write(url)
 
     img_number = 1
-    json_response = response.json()
-    status = json_response["status"]
-    if status != "OK":
-        print("Expected server response OK, got %s" % status)
-        exit(2)
-
-    chapter_hash = json_response["hash"]
-    server = json_response.get("server", "https://mangadex.org/data/")
-    for image_name in json_response["page_array"]:
-        image_url = "%s%s/%s" % (server, chapter_hash, image_name)
+    for image_url in image_urls:
         ext = image_url[image_url.rfind("."):]
         image_name = str(img_number) + ext
         image_path = os.path.join(download_dir, image_name)
         print("Downloading {} to {}".format(image_url, image_path))
-        if not download_file(image_url, cookies, image_path):
+        if not download_file(image_url, image_path):
             print("Failed to download image: %s" % image_url)
             os.remove(in_progress_filepath)
             exit(2)
author	dec05eba <dec05eba@protonmail.com>	2021-05-07 07:38:59 +0200
committer	dec05eba <dec05eba@protonmail.com>	2021-05-07 07:38:59 +0200
commit	48e757baffbf75bc8a1e4171ad94c27d7356cafa (patch)
tree	91cad8a939725bd3a52bf2bc726620d164d64a57 /plugins/mangadex.py
parent	52c63554190b8421c6f2db72d490f50364a2e23d (diff)