From 901edef180648ce9c76ec4f73005da6498f57cf0 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 10 Sep 2020 15:21:06 +0200 Subject: Use youtube json api for recommendations instead of searching for json in html --- src/plugins/Youtube.cpp | 136 ++++++++++++++++++++---------------------------- 1 file changed, 57 insertions(+), 79 deletions(-) (limited to 'src') diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp index d779ebc..ce7af0e 100644 --- a/src/plugins/Youtube.cpp +++ b/src/plugins/Youtube.cpp @@ -211,37 +211,6 @@ namespace QuickMedia { return result_items[0]; } - static size_t find_end_of_json(const std::string &website_data, size_t data_start) { - int brace_count = 0; - char string_char = '\0'; - bool escape = false; - for(size_t i = data_start; i < website_data.size(); ++i) { - char c = website_data[i]; - if(c == '{' && string_char == '\0') { - escape = false; - ++brace_count; - } else if(c == '}' && string_char == '\0') { - escape = false; - --brace_count; - if(brace_count == 0) { - return i + 1; - } - } else if(c == '"' || c == '\'') { - if(string_char == '\0') { - string_char = c; - } else if(c == string_char && !escape) { - string_char = '\0'; - } - escape = false; - } else if(c == '\\' && string_char) { - escape = !escape; - } else if(string_char) { - escape = false; - } - } - return std::string::npos; - } - // Returns empty string if continuation token can't be found static std::string item_section_renderer_get_continuation_token(const Json::Value &item_section_renderer_json) { const Json::Value &continuations_json = item_section_renderer_json["continuations"]; @@ -460,7 +429,7 @@ namespace QuickMedia { return result.substr(0, index); } - static std::unique_ptr parse_compact_video_renderer_json(const Json::Value &item_json) { + static std::unique_ptr parse_compact_video_renderer_json(const Json::Value &item_json, std::unordered_set &added_videos) { const Json::Value &compact_video_renderer_json = item_json["compactVideoRenderer"]; if(!compact_video_renderer_json.isObject()) return nullptr; @@ -470,6 +439,9 @@ namespace QuickMedia { return nullptr; std::string video_id_str = video_id_json.asString(); + if(added_videos.find(video_id_str) != added_videos.end()) + return nullptr; + std::string thumbnail_url = "https://img.youtube.com/vi/" + video_id_str + "/hqdefault.jpg"; const char *title = nullptr; @@ -487,6 +459,7 @@ namespace QuickMedia { auto body_item = std::make_unique(title); body_item->url = "https://www.youtube.com/watch?v=" + video_id_str; body_item->thumbnail_url = std::move(thumbnail_url); + added_videos.insert(video_id_str); return body_item; } @@ -508,74 +481,79 @@ namespace QuickMedia { additional_args.insert(additional_args.end(), cookies.begin(), cookies.end());*/ std::string website_data; - if(download_to_string(modified_url, website_data, additional_args, use_tor, true) != DownloadResult::OK) - return result_items; - - size_t data_start = website_data.find("window[\"ytInitialData\"] = {"); - if(data_start == std::string::npos) - return result_items; - - data_start = data_start + 26; - size_t data_end = find_end_of_json(website_data, data_start); - - if(data_end == std::string::npos) + if(download_to_string(modified_url + "&pbj=1", website_data, additional_args, use_tor, true) != DownloadResult::OK) return result_items; Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; - if(!json_reader->parse(&website_data[data_start], &website_data[data_end], &json_root, &json_errors)) { + if(!json_reader->parse(&website_data[0], &website_data[website_data.size()], &json_root, &json_errors)) { fprintf(stderr, "Youtube related media error: %s\n", json_errors.c_str()); return result_items; } - const Json::Value &contents_json = json_root["contents"]; - if(!contents_json.isObject()) - return result_items; - - const Json::Value &tcwnr_json = contents_json["twoColumnWatchNextResults"]; - if(!tcwnr_json.isObject()) - return result_items; - - const Json::Value &secondary_results_json = tcwnr_json["secondaryResults"]; - if(!secondary_results_json.isObject()) + if(!json_root.isArray()) return result_items; - const Json::Value &secondary_results2_json = secondary_results_json["secondaryResults"]; - if(!secondary_results2_json.isObject()) - return result_items; - - const Json::Value &results_json = secondary_results2_json["results"]; - if(!results_json.isArray()) - return result_items; + std::unordered_set added_videos; - for(const Json::Value &item_json : results_json) { - if(!item_json.isObject()) + for(const Json::Value &json_item : json_root) { + if(!json_item.isObject()) continue; - auto body_item = parse_compact_video_renderer_json(item_json); - if(body_item) - result_items.push_back(std::move(body_item)); - - const Json::Value &compact_autoplay_renderer_json = item_json["compactAutoplayRenderer"]; - if(!compact_autoplay_renderer_json.isObject()) - continue; - - const Json::Value &item_contents_json = compact_autoplay_renderer_json["contents"]; - if(!item_contents_json.isArray()) + const Json::Value &response_json = json_item["response"]; + if(!response_json.isObject()) continue; + + const Json::Value &contents_json = response_json["contents"]; + if(!contents_json.isObject()) + return result_items; + + const Json::Value &tcwnr_json = contents_json["twoColumnWatchNextResults"]; + if(!tcwnr_json.isObject()) + return result_items; + + const Json::Value &secondary_results_json = tcwnr_json["secondaryResults"]; + if(!secondary_results_json.isObject()) + return result_items; + + const Json::Value &secondary_results2_json = secondary_results_json["secondaryResults"]; + if(!secondary_results2_json.isObject()) + return result_items; - for(const Json::Value &content_item_json : item_contents_json) { - if(!content_item_json.isObject()) + const Json::Value &results_json = secondary_results2_json["results"]; + if(!results_json.isArray()) + return result_items; + + for(const Json::Value &item_json : results_json) { + if(!item_json.isObject()) continue; + + auto body_item = parse_compact_video_renderer_json(item_json, added_videos); + if(body_item) + result_items.push_back(std::move(body_item)); - const Json::Value &compact_video_renderer_json = content_item_json["compactVideoRenderer"]; - if(!compact_video_renderer_json.isObject()) + const Json::Value &compact_autoplay_renderer_json = item_json["compactAutoplayRenderer"]; + if(!compact_autoplay_renderer_json.isObject()) continue; - auto body_item = parse_compact_video_renderer_json(content_item_json); - result_items.push_back(std::move(body_item)); + const Json::Value &item_contents_json = compact_autoplay_renderer_json["contents"]; + if(!item_contents_json.isArray()) + continue; + + for(const Json::Value &content_item_json : item_contents_json) { + if(!content_item_json.isObject()) + continue; + + const Json::Value &compact_video_renderer_json = content_item_json["compactVideoRenderer"]; + if(!compact_video_renderer_json.isObject()) + continue; + + auto body_item = parse_compact_video_renderer_json(content_item_json, added_videos); + if(body_item) + result_items.push_back(std::move(body_item)); + } } } -- cgit v1.2.3