From 67618e51ed44effba871447255a5e7389969ccaa Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 28 Apr 2021 22:50:42 +0200 Subject: Create generic media plugin --- src/plugins/MediaGeneric.cpp | 172 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 src/plugins/MediaGeneric.cpp (limited to 'src/plugins/MediaGeneric.cpp') diff --git a/src/plugins/MediaGeneric.cpp b/src/plugins/MediaGeneric.cpp new file mode 100644 index 0000000..c911e6b --- /dev/null +++ b/src/plugins/MediaGeneric.cpp @@ -0,0 +1,172 @@ +#include "../../plugins/MediaGeneric.hpp" +#include "../../include/StringUtils.hpp" +#include + +namespace QuickMedia { + using HtmlPathCallback = std::function; + static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) { + return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaHtmlNode *node, void *userdata) { + HtmlPathCallback *callback = (HtmlPathCallback*)userdata; + (*callback)(node); + }, &callback); + } + + static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + if(strcmp(field_name, "text") == 0) + return quickmedia_html_node_get_text(node); + else + return quickmedia_html_node_get_attribute_value(node, field_name); + } + + static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, BodyItems &result_items) { + std::vector args; + if(!website_url.empty()) + args.push_back({ "-H", "referer: " + website_url }); + + std::string website_data; + if(download_to_string(url, website_data, args, true) != DownloadResult::OK) + return PluginResult::NET_ERR; + + if(website_data.empty()) + return PluginResult::OK; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + for(const MediaTextQuery &text_query : text_queries) { + if(!text_query.html_query || !text_query.title_field) { + assert(false); + result = -1; + goto cleanup; + } + + result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaHtmlNode *node) { + const char *title_value = html_attr_or_inner_text(node, text_query.title_field); + const char *url_value = html_attr_or_inner_text(node, text_query.url_field); + if(title_value && url_value && (!text_query.url_contains || strstr(url_value, text_query.url_contains))) { + std::string field1_fixed = strip(title_value); + html_unescape_sequences(field1_fixed); + auto item = BodyItem::create(std::move(field1_fixed)); + item->url = strip(url_value); + result_items.push_back(std::move(item)); + } + }); + if(result != 0) + goto cleanup; + } + + for(const MediaThumbnailQuery &thumbnail_query : thumbnail_queries) { + assert(thumbnail_query.html_query && thumbnail_query.field_name); + if(thumbnail_query.html_query && thumbnail_query.field_name) { + size_t index = 0; + result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaHtmlNode *node) { + const char *field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); + if(index < result_items.size() && field_value && (!thumbnail_query.field_contains || strstr(field_value, thumbnail_query.field_contains))) { + result_items[index]->thumbnail_url = strip(field_value); + ++index; + } + }); + if(result != 0) + goto cleanup; + } + } + + for(auto &body_item : result_items) { + if(string_starts_with(body_item->url, "//")) + body_item->url = "https://" + body_item->url.substr(2); + else if(string_starts_with(body_item->url, "/")) + body_item->url = website_url + body_item->url.substr(1); + + if(string_starts_with(body_item->thumbnail_url, "//")) + body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2); + else if(string_starts_with(body_item->thumbnail_url, "/")) + body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1); + } + + cleanup: + quickmedia_html_search_deinit(&html_search); + if(result == 0) { + return PluginResult::OK; + } else { + result_items.clear(); + return PluginResult::ERR; + } + } + + MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size) : + Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size) + { + if(!this->website_url.empty()) { + if(this->website_url.back() != '/') + this->website_url.push_back('/'); + } + } + + SearchResult MediaGenericSearchPage::search(const std::string &str, BodyItems &result_items) { + return plugin_result_to_search_result(get_page(str, 0, result_items)); + } + + PluginResult MediaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) { + std::string url = search_query.search_template; + string_replace_all(url, "%s", url_param_encode(str)); + string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); + return fetch_page_results(url, website_url, text_queries, thumbnail_queries, result_items); + } + + PluginResult MediaGenericSearchPage::submit(const std::string&, const std::string &url, std::vector &result_tabs) { + result_tabs.push_back(Tab{nullptr, std::make_unique(program, this, url), nullptr}); + return PluginResult::OK; + } + + PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) { + return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, result_items); + } + + MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) { + search_query.search_template = search_template; + search_query.page_start = page_start; + return *this; + } + + MediaGenericSearchPage& MediaGenericSearchPage::text_handler(std::vector queries) { + text_queries = std::move(queries); + return *this; + } + + MediaGenericSearchPage& MediaGenericSearchPage::thumbnail_handler(std::vector queries) { + thumbnail_queries = std::move(queries); + return *this; + } + + MediaGenericSearchPage& MediaGenericSearchPage::related_media_text_handler(std::vector queries) { + related_media_text_queries = std::move(queries); + return *this; + } + + MediaGenericSearchPage& MediaGenericSearchPage::related_media_thumbnail_handler(std::vector queries) { + related_media_thumbnail_queries = std::move(queries); + return *this; + } + + PluginResult MediaGenericRelatedPage::submit(const std::string&, const std::string &url, std::vector &result_tabs) { + result_tabs.push_back(Tab{nullptr, std::make_unique(program, search_page, url), nullptr}); + return PluginResult::OK; + } + + BodyItems MediaGenericVideoPage::get_related_media(const std::string &url, std::string&) { + BodyItems result_items; + search_page->get_related_media(url, result_items); + return result_items; + } + + std::unique_ptr MediaGenericVideoPage::create_search_page(Program*, int &search_delay) { + search_delay = 500; // TODO: Make configurable? + return std::make_unique(*search_page); + } + + std::unique_ptr MediaGenericVideoPage::create_related_videos_page(Program *program, const std::string&, const std::string&) { + return std::make_unique(program, search_page); + } +} \ No newline at end of file -- cgit v1.2.3