From 82e66059dc09087b625e25027922a9e3c3ccc6cd Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 29 Jun 2020 18:40:18 +0200 Subject: Get first next page of search results on youtube, request only json for first result as well --- plugins/Youtube.hpp | 2 + src/DownloadUtils.cpp | 2 +- src/plugins/Youtube.cpp | 232 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 165 insertions(+), 71 deletions(-) diff --git a/plugins/Youtube.hpp b/plugins/Youtube.hpp index a676cd0..2eea8c2 100644 --- a/plugins/Youtube.hpp +++ b/plugins/Youtube.hpp @@ -13,6 +13,8 @@ namespace QuickMedia { int get_search_delay() const override { return 350; } bool search_suggestion_is_search() const override { return true; } Page get_page_after_search() const override { return Page::VIDEO_CONTENT; } + private: + void search_suggestions_get_continuation(const std::string &url, const std::string &continuation_token, BodyItems &result_items); private: std::string last_related_media_playlist_id; BodyItems last_playlist_data; diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp index 8193c90..e02fa6b 100644 --- a/src/DownloadUtils.cpp +++ b/src/DownloadUtils.cpp @@ -36,7 +36,7 @@ namespace QuickMedia { if(debug_download) { for(const char *arg : args) { if(arg) - fprintf(stderr, "%s ", arg); + fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp index 3fd2a0f..3c98f12 100644 --- a/src/plugins/Youtube.cpp +++ b/src/plugins/Youtube.cpp @@ -35,108 +35,200 @@ namespace QuickMedia { return std::string::npos; } - // TODO: Speed this up by using string.find instead of parsing html + // Returns empty string if continuation token can't be found + static std::string item_section_renderer_get_continuation_token(const Json::Value &item_section_renderer_json) { + const Json::Value &continuations_json = item_section_renderer_json["continuations"]; + if(!continuations_json.isArray() || continuations_json.empty()) + return ""; + + const Json::Value &first_continuation_json = continuations_json[0]; + if(!first_continuation_json.isObject()) + return ""; + + const Json::Value &next_continuation_data_json = first_continuation_json["nextContinuationData"]; + if(!next_continuation_data_json.isObject()) + return ""; + + const Json::Value &continuation_json = next_continuation_data_json["continuation"]; + if(!continuation_json.isString()) + return ""; + + return continuation_json.asString(); + } + + static void parse_item_section_renderer(const Json::Value &item_section_renderer_json, std::string &continuation_token, BodyItems &result_items) { + if(continuation_token.empty()) + continuation_token = item_section_renderer_get_continuation_token(item_section_renderer_json); + + const Json::Value &item_contents_json = item_section_renderer_json["contents"]; + if(!item_contents_json.isArray()) + return; + + for(const Json::Value &content_item_json : item_contents_json) { + if(!content_item_json.isObject()) + continue; + + const Json::Value &video_renderer_json = content_item_json["videoRenderer"]; + if(!video_renderer_json.isObject()) + continue; + + const Json::Value &video_id_json = video_renderer_json["videoId"]; + if(!video_id_json.isString()) + continue; + + std::string video_id_str = video_id_json.asString(); + std::string thumbnail_url = "https://img.youtube.com/vi/" + video_id_str + "/hqdefault.jpg"; + + const char *title = nullptr; + const Json::Value &title_json = video_renderer_json["title"]; + if(title_json.isObject()) { + const Json::Value &runs_json = title_json["runs"]; + if(runs_json.isArray() && !runs_json.empty()) { + const Json::Value &first_runs_json = runs_json[0]; + if(first_runs_json.isObject()) { + const Json::Value &text_json = first_runs_json["text"]; + if(text_json.isString()) + title = text_json.asCString(); + } + } + } + + if(!title) + continue; + + auto body_item = std::make_unique(title); + body_item->url = "https://www.youtube.com/watch?v=" + video_id_str; + body_item->thumbnail_url = std::move(thumbnail_url); + result_items.push_back(std::move(body_item)); + } + } + SuggestionResult Youtube::update_search_suggestions(const std::string &text, BodyItems &result_items) { std::string url = "https://youtube.com/results?search_query="; url += url_param_encode(text); + std::vector additional_args = { + { "-H", "x-spf-referer: " + url }, + { "-H", "x-youtube-client-name: 1" }, + { "-H", "x-youtube-client-version: 2.20200626.03.00" }, + { "-H", "referer: " + url } + }; + std::string website_data; - if(download_to_string(url, website_data, {}, use_tor, true) != DownloadResult::OK) + if(download_to_string(url + "&pbj=1", website_data, additional_args, use_tor, true) != DownloadResult::OK) return SuggestionResult::NET_ERR; - size_t data_start = website_data.find("window[\"ytInitialData\"] = {"); - if(data_start == std::string::npos) - return SuggestionResult::ERR; - - data_start = data_start + 26; - size_t data_end = find_end_of_json(website_data, data_start); - - if(data_end == std::string::npos) - return SuggestionResult::ERR; - Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; - if(!json_reader->parse(&website_data[data_start], &website_data[data_end], &json_root, &json_errors)) { + if(!json_reader->parse(&website_data[0], &website_data[website_data.size()], &json_root, &json_errors)) { fprintf(stderr, "Youtube search json error: %s\n", json_errors.c_str()); return SuggestionResult::ERR; } - const Json::Value &contents_json = json_root["contents"]; - if(!contents_json.isObject()) + if(!json_root.isArray()) return SuggestionResult::ERR; - const Json::Value &tcsrr_json = contents_json["twoColumnSearchResultsRenderer"]; - if(!tcsrr_json.isObject()) - return SuggestionResult::ERR; + std::string continuation_token; - const Json::Value &primary_contents_json = tcsrr_json["primaryContents"]; - if(!primary_contents_json.isObject()) - return SuggestionResult::ERR; - - const Json::Value §ion_list_renderer_json = primary_contents_json["sectionListRenderer"]; - if(!section_list_renderer_json.isObject()) - return SuggestionResult::ERR; - - const Json::Value &contents2_json = section_list_renderer_json["contents"]; - if(!contents2_json.isArray()) - return SuggestionResult::ERR; - - for(const Json::Value &item_json : contents2_json) { - if(!item_json.isObject()) + for(const Json::Value &json_item : json_root) { + if(!json_item.isObject()) continue; - - const Json::Value &item_section_renderer_json = item_json["itemSectionRenderer"]; - if(!item_section_renderer_json.isObject()) + + const Json::Value &response_json = json_item["response"]; + if(!response_json.isObject()) continue; + + const Json::Value &contents_json = response_json["contents"]; + if(!contents_json.isObject()) + return SuggestionResult::ERR; + + const Json::Value &tcsrr_json = contents_json["twoColumnSearchResultsRenderer"]; + if(!tcsrr_json.isObject()) + return SuggestionResult::ERR; + + const Json::Value &primary_contents_json = tcsrr_json["primaryContents"]; + if(!primary_contents_json.isObject()) + return SuggestionResult::ERR; + + const Json::Value §ion_list_renderer_json = primary_contents_json["sectionListRenderer"]; + if(!section_list_renderer_json.isObject()) + return SuggestionResult::ERR; - const Json::Value &item_contents_json = item_section_renderer_json["contents"]; - if(!item_contents_json.isArray()) - continue; + const Json::Value &contents2_json = section_list_renderer_json["contents"]; + if(!contents2_json.isArray()) + return SuggestionResult::ERR; - for(const Json::Value &content_item_json : item_contents_json) { - if(!content_item_json.isObject()) + for(const Json::Value &item_json : contents2_json) { + if(!item_json.isObject()) continue; - const Json::Value &video_renderer_json = content_item_json["videoRenderer"]; - if(!video_renderer_json.isObject()) + const Json::Value &item_section_renderer_json = item_json["itemSectionRenderer"]; + if(!item_section_renderer_json.isObject()) continue; - - const Json::Value &video_id_json = video_renderer_json["videoId"]; - if(!video_id_json.isString()) - continue; - - std::string video_id_str = video_id_json.asString(); - std::string thumbnail_url = "https://img.youtube.com/vi/" + video_id_str + "/hqdefault.jpg"; - - const char *title = nullptr; - const Json::Value &title_json = video_renderer_json["title"]; - if(title_json.isObject()) { - const Json::Value &runs_json = title_json["runs"]; - if(runs_json.isArray() && runs_json.size() > 0) { - const Json::Value &first_runs_json = runs_json[0]; - if(first_runs_json.isObject()) { - const Json::Value &text_json = first_runs_json["text"]; - if(text_json.isString()) - title = text_json.asCString(); - } - } - } - if(!title) - continue; - - auto body_item = std::make_unique(title); - body_item->url = "https://www.youtube.com/watch?v=" + video_id_str; - body_item->thumbnail_url = std::move(thumbnail_url); - result_items.push_back(std::move(body_item)); + parse_item_section_renderer(item_section_renderer_json, continuation_token, result_items); } } + // The continuation data can also contain continuation, but we ignore that for now. Only get the first continuation data + if(!continuation_token.empty()) + search_suggestions_get_continuation(url, continuation_token, result_items); + return SuggestionResult::OK; } + void Youtube::search_suggestions_get_continuation(const std::string &url, const std::string &continuation_token, BodyItems &result_items) { + std::string next_url = url + "&pbj=1&ctoken=" + continuation_token; + + std::vector additional_args = { + { "-H", "x-spf-referer: " + url }, + { "-H", "x-youtube-client-name: 1" }, + { "-H", "x-spf-previous: " + url }, + { "-H", "x-youtube-client-version: 2.20200626.03.00" }, + { "-H", "referer: " + url } + }; + + std::string website_data; + if(download_to_string(next_url, website_data, additional_args, use_tor, true) != DownloadResult::OK) + return; + + Json::Value json_root; + Json::CharReaderBuilder json_builder; + std::unique_ptr json_reader(json_builder.newCharReader()); + std::string json_errors; + if(!json_reader->parse(&website_data[0], &website_data[website_data.size()], &json_root, &json_errors)) { + fprintf(stderr, "Youtube search continuation json error: %s\n", json_errors.c_str()); + return; + } + + if(!json_root.isArray()) + return; + + std::string next_continuation_token; + + for(const Json::Value &json_item : json_root) { + if(!json_item.isObject()) + continue; + + const Json::Value &response_json = json_item["response"]; + if(!response_json.isObject()) + continue; + + const Json::Value &continuation_contents_json = response_json["continuationContents"]; + if(!continuation_contents_json.isObject()) + continue; + + const Json::Value &item_section_continuation_json = continuation_contents_json["itemSectionContinuation"]; + if(!item_section_continuation_json.isObject()) + continue; + + // Note: item_section_continuation json object is compatible with item_section_renderer json object + parse_item_section_renderer(item_section_continuation_json, next_continuation_token, result_items); + } + } + static std::string get_playlist_id_from_url(const std::string &url) { std::string playlist_id = url; size_t list_index = playlist_id.find("&list="); -- cgit v1.2.3