aboutsummaryrefslogtreecommitdiff
path: root/src/plugins
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-06-29 18:40:18 +0200
committerdec05eba <dec05eba@protonmail.com>2020-06-29 18:40:18 +0200
commit82e66059dc09087b625e25027922a9e3c3ccc6cd (patch)
tree1fb504d7cf66e22b700108c0d4d88e81898d0114 /src/plugins
parentcd606f9604379a783ffb25baaac1e4b1142314e6 (diff)
Get first next page of search results on youtube, request only json for first result as well
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/Youtube.cpp232
1 files changed, 162 insertions, 70 deletions
diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp
index 3fd2a0f..3c98f12 100644
--- a/src/plugins/Youtube.cpp
+++ b/src/plugins/Youtube.cpp
@@ -35,108 +35,200 @@ namespace QuickMedia {
return std::string::npos;
}
- // TODO: Speed this up by using string.find instead of parsing html
+ // Returns empty string if continuation token can't be found
+ static std::string item_section_renderer_get_continuation_token(const Json::Value &item_section_renderer_json) {
+ const Json::Value &continuations_json = item_section_renderer_json["continuations"];
+ if(!continuations_json.isArray() || continuations_json.empty())
+ return "";
+
+ const Json::Value &first_continuation_json = continuations_json[0];
+ if(!first_continuation_json.isObject())
+ return "";
+
+ const Json::Value &next_continuation_data_json = first_continuation_json["nextContinuationData"];
+ if(!next_continuation_data_json.isObject())
+ return "";
+
+ const Json::Value &continuation_json = next_continuation_data_json["continuation"];
+ if(!continuation_json.isString())
+ return "";
+
+ return continuation_json.asString();
+ }
+
+ static void parse_item_section_renderer(const Json::Value &item_section_renderer_json, std::string &continuation_token, BodyItems &result_items) {
+ if(continuation_token.empty())
+ continuation_token = item_section_renderer_get_continuation_token(item_section_renderer_json);
+
+ const Json::Value &item_contents_json = item_section_renderer_json["contents"];
+ if(!item_contents_json.isArray())
+ return;
+
+ for(const Json::Value &content_item_json : item_contents_json) {
+ if(!content_item_json.isObject())
+ continue;
+
+ const Json::Value &video_renderer_json = content_item_json["videoRenderer"];
+ if(!video_renderer_json.isObject())
+ continue;
+
+ const Json::Value &video_id_json = video_renderer_json["videoId"];
+ if(!video_id_json.isString())
+ continue;
+
+ std::string video_id_str = video_id_json.asString();
+ std::string thumbnail_url = "https://img.youtube.com/vi/" + video_id_str + "/hqdefault.jpg";
+
+ const char *title = nullptr;
+ const Json::Value &title_json = video_renderer_json["title"];
+ if(title_json.isObject()) {
+ const Json::Value &runs_json = title_json["runs"];
+ if(runs_json.isArray() && !runs_json.empty()) {
+ const Json::Value &first_runs_json = runs_json[0];
+ if(first_runs_json.isObject()) {
+ const Json::Value &text_json = first_runs_json["text"];
+ if(text_json.isString())
+ title = text_json.asCString();
+ }
+ }
+ }
+
+ if(!title)
+ continue;
+
+ auto body_item = std::make_unique<BodyItem>(title);
+ body_item->url = "https://www.youtube.com/watch?v=" + video_id_str;
+ body_item->thumbnail_url = std::move(thumbnail_url);
+ result_items.push_back(std::move(body_item));
+ }
+ }
+
SuggestionResult Youtube::update_search_suggestions(const std::string &text, BodyItems &result_items) {
std::string url = "https://youtube.com/results?search_query=";
url += url_param_encode(text);
+ std::vector<CommandArg> additional_args = {
+ { "-H", "x-spf-referer: " + url },
+ { "-H", "x-youtube-client-name: 1" },
+ { "-H", "x-youtube-client-version: 2.20200626.03.00" },
+ { "-H", "referer: " + url }
+ };
+
std::string website_data;
- if(download_to_string(url, website_data, {}, use_tor, true) != DownloadResult::OK)
+ if(download_to_string(url + "&pbj=1", website_data, additional_args, use_tor, true) != DownloadResult::OK)
return SuggestionResult::NET_ERR;
- size_t data_start = website_data.find("window[\"ytInitialData\"] = {");
- if(data_start == std::string::npos)
- return SuggestionResult::ERR;
-
- data_start = data_start + 26;
- size_t data_end = find_end_of_json(website_data, data_start);
-
- if(data_end == std::string::npos)
- return SuggestionResult::ERR;
-
Json::Value json_root;
Json::CharReaderBuilder json_builder;
std::unique_ptr<Json::CharReader> json_reader(json_builder.newCharReader());
std::string json_errors;
- if(!json_reader->parse(&website_data[data_start], &website_data[data_end], &json_root, &json_errors)) {
+ if(!json_reader->parse(&website_data[0], &website_data[website_data.size()], &json_root, &json_errors)) {
fprintf(stderr, "Youtube search json error: %s\n", json_errors.c_str());
return SuggestionResult::ERR;
}
- const Json::Value &contents_json = json_root["contents"];
- if(!contents_json.isObject())
+ if(!json_root.isArray())
return SuggestionResult::ERR;
- const Json::Value &tcsrr_json = contents_json["twoColumnSearchResultsRenderer"];
- if(!tcsrr_json.isObject())
- return SuggestionResult::ERR;
+ std::string continuation_token;
- const Json::Value &primary_contents_json = tcsrr_json["primaryContents"];
- if(!primary_contents_json.isObject())
- return SuggestionResult::ERR;
-
- const Json::Value &section_list_renderer_json = primary_contents_json["sectionListRenderer"];
- if(!section_list_renderer_json.isObject())
- return SuggestionResult::ERR;
-
- const Json::Value &contents2_json = section_list_renderer_json["contents"];
- if(!contents2_json.isArray())
- return SuggestionResult::ERR;
-
- for(const Json::Value &item_json : contents2_json) {
- if(!item_json.isObject())
+ for(const Json::Value &json_item : json_root) {
+ if(!json_item.isObject())
continue;
-
- const Json::Value &item_section_renderer_json = item_json["itemSectionRenderer"];
- if(!item_section_renderer_json.isObject())
+
+ const Json::Value &response_json = json_item["response"];
+ if(!response_json.isObject())
continue;
+
+ const Json::Value &contents_json = response_json["contents"];
+ if(!contents_json.isObject())
+ return SuggestionResult::ERR;
+
+ const Json::Value &tcsrr_json = contents_json["twoColumnSearchResultsRenderer"];
+ if(!tcsrr_json.isObject())
+ return SuggestionResult::ERR;
+
+ const Json::Value &primary_contents_json = tcsrr_json["primaryContents"];
+ if(!primary_contents_json.isObject())
+ return SuggestionResult::ERR;
+
+ const Json::Value &section_list_renderer_json = primary_contents_json["sectionListRenderer"];
+ if(!section_list_renderer_json.isObject())
+ return SuggestionResult::ERR;
- const Json::Value &item_contents_json = item_section_renderer_json["contents"];
- if(!item_contents_json.isArray())
- continue;
+ const Json::Value &contents2_json = section_list_renderer_json["contents"];
+ if(!contents2_json.isArray())
+ return SuggestionResult::ERR;
- for(const Json::Value &content_item_json : item_contents_json) {
- if(!content_item_json.isObject())
+ for(const Json::Value &item_json : contents2_json) {
+ if(!item_json.isObject())
continue;
- const Json::Value &video_renderer_json = content_item_json["videoRenderer"];
- if(!video_renderer_json.isObject())
+ const Json::Value &item_section_renderer_json = item_json["itemSectionRenderer"];
+ if(!item_section_renderer_json.isObject())
continue;
-
- const Json::Value &video_id_json = video_renderer_json["videoId"];
- if(!video_id_json.isString())
- continue;
-
- std::string video_id_str = video_id_json.asString();
- std::string thumbnail_url = "https://img.youtube.com/vi/" + video_id_str + "/hqdefault.jpg";
-
- const char *title = nullptr;
- const Json::Value &title_json = video_renderer_json["title"];
- if(title_json.isObject()) {
- const Json::Value &runs_json = title_json["runs"];
- if(runs_json.isArray() && runs_json.size() > 0) {
- const Json::Value &first_runs_json = runs_json[0];
- if(first_runs_json.isObject()) {
- const Json::Value &text_json = first_runs_json["text"];
- if(text_json.isString())
- title = text_json.asCString();
- }
- }
- }
- if(!title)
- continue;
-
- auto body_item = std::make_unique<BodyItem>(title);
- body_item->url = "https://www.youtube.com/watch?v=" + video_id_str;
- body_item->thumbnail_url = std::move(thumbnail_url);
- result_items.push_back(std::move(body_item));
+ parse_item_section_renderer(item_section_renderer_json, continuation_token, result_items);
}
}
+ // The continuation data can also contain continuation, but we ignore that for now. Only get the first continuation data
+ if(!continuation_token.empty())
+ search_suggestions_get_continuation(url, continuation_token, result_items);
+
return SuggestionResult::OK;
}
+ void Youtube::search_suggestions_get_continuation(const std::string &url, const std::string &continuation_token, BodyItems &result_items) {
+ std::string next_url = url + "&pbj=1&ctoken=" + continuation_token;
+
+ std::vector<CommandArg> additional_args = {
+ { "-H", "x-spf-referer: " + url },
+ { "-H", "x-youtube-client-name: 1" },
+ { "-H", "x-spf-previous: " + url },
+ { "-H", "x-youtube-client-version: 2.20200626.03.00" },
+ { "-H", "referer: " + url }
+ };
+
+ std::string website_data;
+ if(download_to_string(next_url, website_data, additional_args, use_tor, true) != DownloadResult::OK)
+ return;
+
+ Json::Value json_root;
+ Json::CharReaderBuilder json_builder;
+ std::unique_ptr<Json::CharReader> json_reader(json_builder.newCharReader());
+ std::string json_errors;
+ if(!json_reader->parse(&website_data[0], &website_data[website_data.size()], &json_root, &json_errors)) {
+ fprintf(stderr, "Youtube search continuation json error: %s\n", json_errors.c_str());
+ return;
+ }
+
+ if(!json_root.isArray())
+ return;
+
+ std::string next_continuation_token;
+
+ for(const Json::Value &json_item : json_root) {
+ if(!json_item.isObject())
+ continue;
+
+ const Json::Value &response_json = json_item["response"];
+ if(!response_json.isObject())
+ continue;
+
+ const Json::Value &continuation_contents_json = response_json["continuationContents"];
+ if(!continuation_contents_json.isObject())
+ continue;
+
+ const Json::Value &item_section_continuation_json = continuation_contents_json["itemSectionContinuation"];
+ if(!item_section_continuation_json.isObject())
+ continue;
+
+ // Note: item_section_continuation json object is compatible with item_section_renderer json object
+ parse_item_section_renderer(item_section_continuation_json, next_continuation_token, result_items);
+ }
+ }
+
static std::string get_playlist_id_from_url(const std::string &url) {
std::string playlist_id = url;
size_t list_index = playlist_id.find("&list=");