#include "../../plugins/MediaGeneric.hpp" #include "../../include/StringUtils.hpp" #include namespace QuickMedia { static bool string_view_contains(const QuickMediaStringView str, const char *sub) { return memmem(str.data, str.size, sub, strlen(sub)); } using HtmlPathCallback = std::function; static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) { return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaMatchNode *node, void *userdata) { HtmlPathCallback *callback = (HtmlPathCallback*)userdata; (*callback)(node); return 0; }, &callback); } static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) { if(strcmp(field_name, "text") == 0) return quickmedia_html_node_get_text(node); else return quickmedia_html_node_get_attribute_value(node, field_name); } static void body_items_prepend_website_url(BodyItems &body_items, const std::string &website_url) { for(auto &body_item : body_items) { if(string_starts_with(body_item->url, "//")) body_item->url = "https://" + body_item->url.substr(2); else if(string_starts_with(body_item->url, "/")) body_item->url = website_url + body_item->url.substr(1); if(string_starts_with(body_item->thumbnail_url, "//")) body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); else if(string_starts_with(body_item->thumbnail_url, "/")) body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); } } static PluginResult fetch_page_results( const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, mgl::vec2i thumbnail_max_size, MediaRelatedCustomHandler *related_custom_handler, BodyItems &result_items, bool cloudflare_bypass, const std::vector &extra_commands) { std::vector args = extra_commands; if(!website_url.empty()) args.push_back({ "--header", "referer: " + website_url }); std::string website_data; if(download_to_string(url, website_data, args, true, true, cloudflare_bypass) != DownloadResult::OK) return PluginResult::NET_ERR; if(website_data.empty()) return PluginResult::OK; if(related_custom_handler && *related_custom_handler) { std::vector media_related_items = (*related_custom_handler)(website_data); for(MediaRelatedItem &media_related_item : media_related_items) { auto body_item = BodyItem::create(media_related_item.title); body_item->url = std::move(media_related_item.url); body_item->thumbnail_url = std::move(media_related_item.thumbnail_url); body_item->thumbnail_size = thumbnail_max_size; result_items.push_back(std::move(body_item)); } body_items_prepend_website_url(result_items, website_url); return PluginResult::OK; } QuickMediaHtmlSearch html_search; int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; for(const MediaTextQuery &text_query : text_queries) { if(!text_query.html_query || !text_query.title_field) { assert(false); result = -1; goto cleanup; } result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaMatchNode *node) { QuickMediaStringView title_value = html_attr_or_inner_text(node, text_query.title_field); QuickMediaStringView url_value = html_attr_or_inner_text(node, text_query.url_field); if(title_value.data && url_value.data && (!text_query.url_contains || string_view_contains(url_value, text_query.url_contains))) { std::string field1_fixed(title_value.data, title_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); item->url.assign(url_value.data, url_value.size); result_items.push_back(std::move(item)); } }); if(result != 0) goto cleanup; } for(const MediaThumbnailQuery &thumbnail_query : thumbnail_queries) { assert(thumbnail_query.html_query && thumbnail_query.field_name); if(thumbnail_query.html_query && thumbnail_query.field_name) { size_t index = 0; result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index, thumbnail_max_size](QuickMediaMatchNode *node) { QuickMediaStringView field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); if(index < result_items.size() && field_value.data && (!thumbnail_query.field_contains || string_view_contains(field_value, thumbnail_query.field_contains))) { result_items[index]->thumbnail_url.assign(field_value.data, field_value.size); result_items[index]->thumbnail_size = thumbnail_max_size; ++index; } }); if(result != 0) goto cleanup; } } body_items_prepend_website_url(result_items, website_url); cleanup: quickmedia_html_search_deinit(&html_search); if(result == 0) { return PluginResult::OK; } else { result_items.clear(); return PluginResult::ERR; } } MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, mgl::vec2i thumbnail_max_size, bool cloudflare_bypass, std::vector extra_commands) : Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass), extra_commands(std::move(extra_commands)) { if(!this->website_url.empty()) { if(this->website_url.back() != '/') this->website_url.push_back('/'); } } SearchResult MediaGenericSearchPage::search(const std::string &str, BodyItems &result_items) { return plugin_result_to_search_result(get_page(str, 0, result_items)); } PluginResult MediaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { std::string url = search_query.search_template; string_replace_all(url, "%s", url_param_encode(str)); string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); return fetch_page_results(url, website_url, text_queries, thumbnail_queries, thumbnail_max_size, nullptr, result_items, cloudflare_bypass, extra_commands); } PluginResult MediaGenericSearchPage::submit(const SubmitArgs &args, std::vector &result_tabs) { result_tabs.push_back(Tab{nullptr, std::make_unique(program, this, args.url), nullptr}); return PluginResult::OK; } PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) { return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, thumbnail_max_size, &related_custom_handler, result_items, cloudflare_bypass, extra_commands); } MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) { search_query.search_template = search_template; search_query.page_start = page_start; return *this; } MediaGenericSearchPage& MediaGenericSearchPage::text_handler(std::vector queries) { text_queries = std::move(queries); return *this; } MediaGenericSearchPage& MediaGenericSearchPage::thumbnail_handler(std::vector queries) { thumbnail_queries = std::move(queries); return *this; } MediaGenericSearchPage& MediaGenericSearchPage::video_url_custom_handler(MediaVideoUrlCustomHandler handler) { video_custom_handler = std::move(handler); return *this; } MediaGenericSearchPage& MediaGenericSearchPage::related_media_text_handler(std::vector queries) { related_media_text_queries = std::move(queries); related_custom_handler = nullptr; return *this; } MediaGenericSearchPage& MediaGenericSearchPage::related_media_thumbnail_handler(std::vector queries) { related_media_thumbnail_queries = std::move(queries); related_custom_handler = nullptr; return *this; } MediaGenericSearchPage& MediaGenericSearchPage::related_media_custom_handler(MediaRelatedCustomHandler handler) { related_custom_handler = std::move(handler); related_media_text_queries.clear(); related_media_thumbnail_queries.clear(); return *this; } PluginResult MediaGenericRelatedPage::submit(const SubmitArgs &args, std::vector &result_tabs) { result_tabs.push_back(Tab{nullptr, std::make_unique(program, search_page, args.url), nullptr}); return PluginResult::OK; } BodyItems MediaGenericVideoPage::get_related_media(const std::string &url) { BodyItems result_items; search_page->get_related_media(url, result_items); return result_items; } PluginResult MediaGenericVideoPage::get_related_pages(const BodyItems &related_videos, const std::string&, std::vector &result_tabs) { auto related_page_body = create_body(false, true); related_page_body->set_items(related_videos); result_tabs.push_back(Tab{std::move(related_page_body), std::make_unique(program, search_page), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); return PluginResult::OK; } std::string MediaGenericVideoPage::get_download_url(int max_height) { // TODO: Use max_height, if possible (void)max_height; return video_url; } std::string MediaGenericVideoPage::get_video_url(int max_height, bool &has_embedded_audio, std::string &ext) { // TODO: Use max_height, if possible (void)max_height; has_embedded_audio = true; ext = "m3u8"; return video_url; } PluginResult MediaGenericVideoPage::load(std::string&, std::string&, double &duration, std::vector&, std::string &err_msg) { video_url.clear(); duration = 0.0; if(!search_page->video_custom_handler) { video_url = url; return PluginResult::OK; } std::vector args = search_page->extra_commands; if(!url.empty()) args.push_back({ "--header", "referer: " + url }); std::string website_data; if(download_to_string(url, website_data, args, true, true, search_page->cloudflare_bypass) != DownloadResult::OK) return PluginResult::NET_ERR; video_url = search_page->video_custom_handler(website_data); if(video_url.empty()) { err_msg = "Failed to extract video url"; return PluginResult::ERR; } else { return PluginResult::OK; } } }