aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2022-09-17 13:52:20 +0200
committerdec05eba <dec05eba@protonmail.com>2022-09-17 13:52:20 +0200
commitf34a17de0d867a8788a63ae9a955052744815922 (patch)
tree15778bb20a99874d484cd7f3d0b9f8f22a8ac2dd /src
parent77b2d411d537ff73ccbfd1ed889256dd450f0fce (diff)
Mangakatana: fix one page again...
Diffstat (limited to 'src')
-rw-r--r--src/QuickMedia.cpp97
1 files changed, 67 insertions, 30 deletions
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index df98a5a..c9e52c1 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -834,31 +834,68 @@ namespace QuickMedia {
.manga_id_handler("/manga/", "/");
}
- static std::vector<std::string> get_javascript_array_string(const std::string &html_source, const std::string &var_name) {
- std::vector<std::string> arr;
- size_t sources_start = html_source.find(var_name + "=[");
- if(sources_start == std::string::npos)
- return arr;
-
- sources_start += var_name.size() + 1; // just before [
- size_t json_end = find_end_of_json_array(html_source.c_str(), sources_start, html_source.size());
- if(json_end == size_t(-1))
- return arr;
-
- sources_start += 1;
- json_end -= 1;
- std::string urls_str = html_source.substr(sources_start, json_end - sources_start);
- string_replace_all(urls_str, "'", "");
-
- string_split(urls_str, ',', [&arr](const char *str, size_t size) {
- std::string url(str, size);
- url = strip(url);
- if(!url.empty())
- arr.push_back(std::move(url));
- return true;
- });
+ static std::vector<std::string> extract_javascript_sections(const std::string &html_source) {
+ std::vector<std::string> sections;
+ size_t start = 0;
+
+ while(true) {
+ start = html_source.find("<script", start);
+ if(start == std::string::npos)
+ break;
+
+ start += 7;
+ start = html_source.find(">", start);
+ if(start == std::string::npos)
+ break;
+
+ start += 1;
+ size_t end = html_source.find("</script>", start);
+ if(end == std::string::npos)
+ break;
- return arr;
+ sections.push_back(html_source.substr(start, end - start));
+ start = end + 9;
+ }
+
+ return sections;
+ }
+
+ static std::vector<std::string> get_javascript_string_arrays_unique(const std::string &js_source) {
+ std::vector<std::string> arrays;
+ size_t start = 0;
+
+ while(true) {
+ start = js_source.find("=['", start);
+ if(start == std::string::npos)
+ break;
+
+ start += 3;
+ size_t end = js_source.find("]", start);
+ if(end == std::string::npos)
+ break;
+
+ size_t sources_start = start - 2; // just before [
+ size_t json_end = find_end_of_json_array(js_source.c_str(), sources_start, js_source.size());
+ if(json_end == size_t(-1))
+ break;
+
+ sources_start += 1;
+ json_end -= 1;
+ std::string urls_str = js_source.substr(sources_start, json_end - sources_start);
+ string_replace_all(urls_str, "'", "");
+
+ string_split(urls_str, ',', [&arrays](const char *str, size_t size) {
+ std::string url(str, size);
+ url = strip(url);
+ if(!url.empty() && (arrays.empty() || arrays.back() != url))
+ arrays.push_back(std::move(url));
+ return true;
+ });
+
+ start = end + 1;
+ }
+
+ return arrays;
}
static void add_mangakatana_handlers(MangaGenericSearchPage *manga_generic_search_page) {
@@ -879,12 +916,12 @@ namespace QuickMedia {
.list_chapters_handler("//div[class='chapters']//div[class='chapter']//a[0]", "text", "href", "/manga/")
.list_chapters_uploaded_time_handler("//div[class='chapters']//div[class='update_time']", "text", nullptr)
.list_page_images_custom_handler([](const std::string &html_source) {
- std::vector<std::string> urls_ytaw = get_javascript_array_string(html_source, "ytaw");
- std::vector<std::string> urls_htnc = get_javascript_array_string(html_source, "htnc");
- if(urls_ytaw.size() >= urls_htnc.size())
- return urls_ytaw;
- else
- return urls_htnc;
+ std::vector<std::string> urls;
+ for(const std::string &js_section : extract_javascript_sections(html_source)) {
+ std::vector<std::string> js_string_array = get_javascript_string_arrays_unique(js_section);
+ urls.insert(urls.end(), std::move_iterator(js_string_array.begin()), std::move_iterator(js_string_array.end()));
+ }
+ return urls;
})
.manga_id_handler("/manga/", nullptr);
}