aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-05-28 02:47:58 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-06 07:12:34 +0200
commit138f83a92770c236f703ba9b9172f321fa59b6ad (patch)
treeb6bdab8ae6eb395e6340c11105284c06037cb26a
parent03534b3fcfd56f7725b5a558a1c6361a1d6db1da (diff)
List all chapters on mangadex, use correct download server
-rwxr-xr-xautomedia.py4
-rwxr-xr-xplugins/mangadex.py66
2 files changed, 50 insertions, 20 deletions
diff --git a/automedia.py b/automedia.py
index 4158d44..5670988 100755
--- a/automedia.py
+++ b/automedia.py
@@ -582,6 +582,10 @@ def sync(rss_config_dir, html_config_dir, download_dir, sync_rate_sec):
sync_rss(rss, tc)
# Add last synced timestamp. This together with "updated" file is used to remove series
# that haven't updated in a long time (either finished series or axes series)
+
+ # TODO: Fix. This will fail if the path is removed while its running,
+ # same with other places.
+ # To fix, add a command to remove tracked items safely
with open(os.path.join(rss_tracked_dir, rss.title, "synced"), "w") as file:
file.write(str(time.time()))
#else:
diff --git a/plugins/mangadex.py b/plugins/mangadex.py
index 238a264..4890417 100755
--- a/plugins/mangadex.py
+++ b/plugins/mangadex.py
@@ -9,6 +9,10 @@ import re
from lxml import etree
+headers = {
+ 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
+}
+
def usage():
print("mangadex.py command")
print("commands:")
@@ -32,7 +36,7 @@ if len(sys.argv) < 2:
usage()
def download_file(url, save_path):
- with requests.get(url, stream=True) as response:
+ with requests.get(url, headers=headers, stream=True) as response:
if not response.ok:
return False
with open(save_path, "wb") as file:
@@ -46,6 +50,9 @@ def title_url_extract_manga_id(url):
if result and len(result.groups()) > 0:
return result.groups()[0]
+def chapter_sort_func(chapter_data):
+ return chapter_data[1].get("timestamp", 0)
+
# TODO: Support pagination. Currently only the n latest chapters are listed,
# but going through all pages might be too slow for large manga like naruto
def list_chapters(url, chapter_list_input):
@@ -54,7 +61,8 @@ def list_chapters(url, chapter_list_input):
print("Failed to extract manga id from url: %s. Note: url is expected to be in this format: mangadex.org/title/<number>/..." % url)
exit(2)
- response = requests.get(url)
+ url = "https://mangadex.org/api/?id=%s&type=manga" % manga_id;
+ response = requests.get(url, headers=headers)
if response.status_code != 200:
print("Failed to list chapters, server responded with status code %d" % response.status_code)
exit(2)
@@ -71,29 +79,46 @@ def list_chapters(url, chapter_list_input):
if url and len(url) > 0:
seen_urls.add(url)
- lang = "1" # english
+ lang = "gb" # english
+
+ json_response = response.json()
+ status = json_response["status"]
+ if status != "OK":
+ print("Expected server response OK, got %s" % status)
+ exit(2)
- tree = etree.HTML(response.text)
+ chapter_json = json_response["chapter"]
chapters = []
- prev_chapter_number = None
- for element in tree.xpath("//div[@data-manga-id='%s']" % manga_id):
- chapter_lang = element.attrib.get("data-lang")
- if chapter_lang != lang:
+ for key, value in chapter_json.items():
+ chapters.append((key, value))
+
+ chapters = sorted(chapters, key=chapter_sort_func, reverse=True)
+ prev_chapter_number = ""
+ output_chapters = []
+ for chapter_data in chapters:
+ chapter_id = chapter_data[0]
+ chapter = chapter_data[1]
+
+ lang_code = chapter.get("lang_code", "")
+ if lang_code != lang:
continue
- chapter_id = element.attrib.get("data-id")
- chapter_url = "https://mangadex.org/chapter/%s" % chapter_id
- chapter_title = element.attrib.get("data-title") # optional
- chapter_number = element.attrib.get("data-chapter")
- if prev_chapter_number and chapter_number == prev_chapter_number:
+
+ chapter_number_str = chapter.get("chapter", "0")
+ if chapter_number_str == prev_chapter_number:
continue
- prev_chapter_number = chapter_number
- chapter_name = "Ch. %s" % chapter_number
+ prev_chapter_number = chapter_number_str
+
+ chapter_title = chapter.get("title")
+ chapter_url = "https://mangadex.org/chapter/" + chapter_id
+ chapter_name = "Ch. " + chapter_number_str
if chapter_title:
- chapter_name += " - %s" % chapter_title
+ chapter_name += " - " + chapter_title
+
if chapter_title.lower().replace(" ", "") in seen_titles or chapter_url in seen_urls:
break
- chapters.append({ "name": chapter_name, "url": chapter_url })
- print(json.dumps(chapters))
+
+ output_chapters.append({ "name": chapter_name, "url": chapter_url })
+ print(json.dumps(output_chapters))
def chapter_url_extract_manga_id(url):
result = re.search("mangadex.org/chapter/([0-9]+)", url)
@@ -109,7 +134,7 @@ def download_chapter(url, download_dir):
url = "https://mangadex.org/api/?id=%s&server=null&type=chapter" % manga_id
- response = requests.get(url)
+ response = requests.get(url, headers=headers)
if response.status_code != 200:
print("Failed to list chapter images, server responded with status code %d" % response.status_code)
exit(2)
@@ -126,8 +151,9 @@ def download_chapter(url, download_dir):
exit(2)
chapter_hash = json_response["hash"]
+ server = json_response.get("server", "https://mangadex.org/data/")
for image_name in json_response["page_array"]:
- image_url = "https://mangadex.org/data/%s/%s" % (chapter_hash, image_name)
+ image_url = "%s%s/%s" % (server, chapter_hash, image_name)
ext = image_url[image_url.rfind("."):]
image_name = str(img_number) + ext
image_path = os.path.join(download_dir, image_name)