From 3c9ca2c97ae7a2b39bfe5c5e8a9d7941f9fb1525 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 8 Aug 2019 04:13:03 +0200 Subject: Youtube: use real search for search suggestions (better results, thumbnails, directly to result) --- src/plugins/Manganelo.cpp | 4 +- src/plugins/Plugin.cpp | 6 +++ src/plugins/Youtube.cpp | 117 +++++++++++++++++++++++----------------------- 3 files changed, 66 insertions(+), 61 deletions(-) (limited to 'src/plugins') diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp index 3c6dd5b..b1f02a3 100644 --- a/src/plugins/Manganelo.cpp +++ b/src/plugins/Manganelo.cpp @@ -3,9 +3,7 @@ #include namespace QuickMedia { - SearchResult Manganelo::search(const std::string &url, std::vector> &result_items, Page &next_page) { - next_page = Page::EPISODE_LIST; - + SearchResult Manganelo::search(const std::string &url, std::vector> &result_items) { std::string website_data; if(download_to_string(url, website_data) != DownloadResult::OK) return SearchResult::NET_ERR; diff --git a/src/plugins/Plugin.cpp b/src/plugins/Plugin.cpp index 2367cb3..86f5d7d 100644 --- a/src/plugins/Plugin.cpp +++ b/src/plugins/Plugin.cpp @@ -10,6 +10,12 @@ static int accumulate_string(char *data, int size, void *userdata) { } namespace QuickMedia { + SearchResult Plugin::search(const std::string &text, std::vector> &result_items) { + (void)text; + (void)result_items; + return SearchResult::OK; + } + SuggestionResult Plugin::update_search_suggestions(const std::string &text, std::vector> &result_items) { (void)text; (void)result_items; diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp index 40d545c..a5670ec 100644 --- a/src/plugins/Youtube.cpp +++ b/src/plugins/Youtube.cpp @@ -12,64 +12,6 @@ namespace QuickMedia { return strstr(str, substr); } - SearchResult Youtube::search(const std::string &text, std::vector> &result_items, Page &next_page) { - next_page = Page::SEARCH_RESULT; - std::string url = "https://youtube.com/results?search_query="; - url += url_param_encode(text); - - std::string website_data; - if(download_to_string(url, website_data) != DownloadResult::OK) - return SearchResult::NET_ERR; - - struct ItemData { - std::vector> *result_items; - size_t index; - }; - ItemData item_data = { &result_items, 0 }; - - QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class=\"yt-lockup-title\"]/a", - [](QuickMediaHtmlNode *node, void *userdata) { - auto *result_items = (std::vector>*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - // Checking for watch?v helps skipping ads - if(href && title && begins_with(href, "/watch?v=")) { - auto item = std::make_unique(strip(title)); - item->url = std::string("https://www.youtube.com") + href; - result_items->push_back(std::move(item)); - } - }, &result_items); - if(result != 0) - goto cleanup; - - result = quickmedia_html_find_nodes_xpath(&html_search, "//span[class=\"yt-thumb-simple\"]//img", - [](QuickMediaHtmlNode *node, void *userdata) { - ItemData *item_data = (ItemData*)userdata; - if(item_data->index >= item_data->result_items->size()) - return; - - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - const char *data_thumb = quickmedia_html_node_get_attribute_value(node, "data-thumb"); - - if(src && contains(src, "i.ytimg.com/")) { - (*item_data->result_items)[item_data->index]->thumbnail_url = src; - ++item_data->index; - } else if(data_thumb && contains(data_thumb, "i.ytimg.com/")) { - (*item_data->result_items)[item_data->index]->thumbnail_url = data_thumb; - ++item_data->index; - } - }, &item_data); - - cleanup: - quickmedia_html_search_deinit(&html_search); - return result == 0 ? SearchResult::OK : SearchResult::ERR; - } - static void iterate_suggestion_result(const Json::Value &value, std::vector> &result_items, int &iterate_count) { ++iterate_count; if(value.isArray()) { @@ -83,7 +25,11 @@ namespace QuickMedia { } } + // TODO: Speed this up by using string.find instead of parsing html SuggestionResult Youtube::update_search_suggestions(const std::string &text, std::vector> &result_items) { + // Keep this for backup. This is using search suggestion the same way youtube does it, but the results + // are not as good as doing an actual search. + #if 0 std::string url = "https://clients1.google.com/complete/search?client=youtube&hl=en&gs_rn=64&gs_ri=youtube&ds=yt&cp=7&gs_id=x&q="; url += url_param_encode(text); @@ -125,6 +71,61 @@ namespace QuickMedia { if(!found_search_text) result_items.insert(result_items.begin(), std::make_unique(text)); return SuggestionResult::OK; + #endif + std::string url = "https://youtube.com/results?search_query="; + url += url_param_encode(text); + + std::string website_data; + if(download_to_string(url, website_data) != DownloadResult::OK) + return SuggestionResult::NET_ERR; + + struct ItemData { + std::vector> *result_items; + size_t index; + }; + ItemData item_data = { &result_items, 0 }; + + QuickMediaHtmlSearch html_search; + int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class=\"yt-lockup-title\"]/a", + [](QuickMediaHtmlNode *node, void *userdata) { + auto *result_items = (std::vector>*)userdata; + const char *href = quickmedia_html_node_get_attribute_value(node, "href"); + const char *title = quickmedia_html_node_get_attribute_value(node, "title"); + // Checking for watch?v helps skipping ads + if(href && title && begins_with(href, "/watch?v=")) { + auto item = std::make_unique(strip(title)); + item->url = std::string("https://www.youtube.com") + href; + result_items->push_back(std::move(item)); + } + }, &result_items); + if(result != 0) + goto cleanup; + + result = quickmedia_html_find_nodes_xpath(&html_search, "//span[class=\"yt-thumb-simple\"]//img", + [](QuickMediaHtmlNode *node, void *userdata) { + ItemData *item_data = (ItemData*)userdata; + if(item_data->index >= item_data->result_items->size()) + return; + + const char *src = quickmedia_html_node_get_attribute_value(node, "src"); + const char *data_thumb = quickmedia_html_node_get_attribute_value(node, "data-thumb"); + + if(src && contains(src, "i.ytimg.com/")) { + (*item_data->result_items)[item_data->index]->thumbnail_url = src; + ++item_data->index; + } else if(data_thumb && contains(data_thumb, "i.ytimg.com/")) { + (*item_data->result_items)[item_data->index]->thumbnail_url = data_thumb; + ++item_data->index; + } + }, &item_data); + + cleanup: + quickmedia_html_search_deinit(&html_search); + return result == 0 ? SuggestionResult::OK : SuggestionResult::ERR; } std::vector> Youtube::get_related_media(const std::string &url) { -- cgit v1.2.3