From 033fbf3f1363c810d115ce9a531aea26ea9e1cf1 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 17 Apr 2021 13:16:03 +0200 Subject: Fix mangakatana search when mangakatana redirect to the exact match --- src/plugins/MangaGeneric.cpp | 135 +++++++++++++++++++++++++++++++------------ 1 file changed, 98 insertions(+), 37 deletions(-) (limited to 'src/plugins') diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp index 607488f..ebfbdda 100644 --- a/src/plugins/MangaGeneric.cpp +++ b/src/plugins/MangaGeneric.cpp @@ -58,11 +58,18 @@ namespace QuickMedia { [](QuickMediaHtmlNode *node, void *userdata) { HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata; const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1); - const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); - if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { - auto item = BodyItem::create(strip(field1_value)); - item->url = strip(field2_value); - search_userdata->body_items->push_back(std::move(item)); + if(search_userdata->field2) { + const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); + if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { + auto item = BodyItem::create(strip(field1_value)); + item->url = strip(field2_value); + search_userdata->body_items->push_back(std::move(item)); + } + } else { + if(field1_value) { + auto item = BodyItem::create(strip(field1_value)); + search_userdata->body_items->push_back(std::move(item)); + } } }, search_userdata); } @@ -98,6 +105,34 @@ namespace QuickMedia { }, page_image_userdata); } + static size_t str_find_case_insensitive(const std::string &str, size_t start_index, const char *substr, size_t substr_len) { + auto it = std::search(str.begin() + start_index, str.end(), substr, substr + substr_len, + [](char c1, char c2) { + return std::toupper(c1) == std::toupper(c2); + }); + if(it == str.end()) + return std::string::npos; + return it - str.begin(); + } + + static std::string header_extract_location(const std::string &headers) { + size_t index = str_find_case_insensitive(headers, 0, "location:", 9); + if(index != std::string::npos && (index == 0 || headers[index - 1] == '\n')) { + index += 9; + size_t end = headers.find('\r', index); + size_t start = index; + while(start < end) { + char c = headers[start]; + if(c != ' ' && c != '\t') + break; + ++start; + } + if(end - start > 0) + return headers.substr(start, end - start); + } + return ""; + } + MangaGenericSearchPage::MangaGenericSearchPage(Program *program, const char *service_name, const char *website_url, bool fail_on_http_error) : Page(program), service_name(service_name), website_url(website_url ? website_url : ""), fail_on_http_error(fail_on_http_error) { @@ -112,17 +147,7 @@ namespace QuickMedia { } PluginResult MangaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { - if(!search_query.search_template || !text_query.html_query || !text_query.title_field || !text_query.url_field) { - assert(false); - return PluginResult::ERR; - } - - HtmlSearchUserdata search_userdata; - search_userdata.body_items = &result_items; - search_userdata.field1 = text_query.title_field; - search_userdata.field2 = text_query.url_field; - search_userdata.field2_contains = text_query.url_contains; - + std::string target_url; std::string url = search_query.search_template; string_replace_all(url, "%s", url_param_encode(str)); string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); @@ -139,19 +164,60 @@ namespace QuickMedia { if(result != 0) goto cleanup; - result = html_append_search(&html_search, text_query.html_query, &search_userdata); - if(result != 0) - goto cleanup; + for(const TextQuery &text_query : text_queries) { + if(!search_query.search_template || !text_query.html_query || !text_query.title_field) { + assert(false); + return PluginResult::ERR; + } - assert(!thumbnail_query.html_query || thumbnail_query.field_name); - if(thumbnail_query.html_query && thumbnail_query.field_name) { - HtmlMergeUserdata merge_userdata; - merge_userdata.type = MergeType::THUMBNAIL; - merge_userdata.body_item_image_context.body_items = &result_items; - merge_userdata.body_item_image_context.index = 0; - merge_userdata.field_name = thumbnail_query.field_name; - merge_userdata.field_contains = thumbnail_query.field_contains; - result = html_body_item_merge(&html_search, thumbnail_query.html_query, &merge_userdata); + BodyItems new_result_items; + HtmlSearchUserdata search_userdata; + search_userdata.body_items = &new_result_items; + search_userdata.field1 = text_query.title_field; + search_userdata.field2 = text_query.url_field; + search_userdata.field2_contains = text_query.url_contains; + + result = html_append_search(&html_search, text_query.html_query, &search_userdata); + if(result != 0) + goto cleanup; + + for(const ThumbnailQuery &thumbnail_query : thumbnail_queries) { + assert(!thumbnail_query.html_query || thumbnail_query.field_name); + if(thumbnail_query.html_query && thumbnail_query.field_name) { + HtmlMergeUserdata merge_userdata; + merge_userdata.type = MergeType::THUMBNAIL; + merge_userdata.body_item_image_context.body_items = &new_result_items; + merge_userdata.body_item_image_context.index = 0; + merge_userdata.field_name = thumbnail_query.field_name; + merge_userdata.field_contains = thumbnail_query.field_contains; + result = html_body_item_merge(&html_search, thumbnail_query.html_query, &merge_userdata); + if(result != 0) + goto cleanup; + } + } + + if(!text_query.url_field && !new_result_items.empty()) { + if(target_url.empty()) { + std::string response_headers; + DownloadResult download_result = download_head_to_string(url, response_headers, true); + if(download_result != DownloadResult::OK) { + result = -1; + goto cleanup; + } + + target_url = header_extract_location(response_headers); + if(target_url.empty()) { + fprintf(stderr, "Failed to extract target location from %s HEAD\n", url.c_str()); + result = -1; + goto cleanup; + } + } + + for(auto &new_body_item : new_result_items) { + new_body_item->url = target_url; + } + } + result_items.insert(result_items.end(), std::move_iterator(new_result_items.begin()), std::move_iterator(new_result_items.end())); } for(auto &body_item : result_items) { @@ -534,18 +600,13 @@ namespace QuickMedia { return *this; } - MangaGenericSearchPage& MangaGenericSearchPage::text_handler(const char *html_query, const char *title_field, const char *url_field, const char *url_contains) { - text_query.html_query = html_query; - text_query.title_field = title_field; - text_query.url_field = url_field; - text_query.url_contains = url_contains; + MangaGenericSearchPage& MangaGenericSearchPage::text_handler(std::vector queries) { + text_queries = std::move(queries); return *this; } - MangaGenericSearchPage& MangaGenericSearchPage::thumbnail_handler(const char *html_query, const char *field_name, const char *field_contains) { - thumbnail_query.html_query = html_query; - thumbnail_query.field_name = field_name; - thumbnail_query.field_contains = field_contains; + MangaGenericSearchPage& MangaGenericSearchPage::thumbnail_handler(std::vector queries) { + thumbnail_queries = std::move(queries); return *this; } -- cgit v1.2.3