From e4792f46d545263d16db21bd0caf71345a69b63f Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 22 Jun 2021 16:07:20 +0200 Subject: Only do youtube redirect on failure to load video --- src/plugins/Youtube.cpp | 402 +++++++++++++++++++++++++++--------------------- 1 file changed, 229 insertions(+), 173 deletions(-) (limited to 'src/plugins/Youtube.cpp') diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp index 79eb3d5..4df1358 100644 --- a/src/plugins/Youtube.cpp +++ b/src/plugins/Youtube.cpp @@ -101,6 +101,170 @@ R"END( return false; } + static std::mutex cookies_mutex; + static std::string cookies_filepath; + static std::string api_key; + + static bool is_whitespace(char c) { + return (c >= 8 && c <= 13) || c == ' '; + } + + // TODO: Cache this and redownload it when a network request fails with this api key? Do that in the same place as the signature, which means it would be done asynchronously + static std::string youtube_page_find_api_key() { + size_t api_key_index; + size_t api_key_index_end; + size_t api_key_length; + std::string website_result; + std::string::iterator api_key_start; + + if(download_to_string("https://www.youtube.com/?gl=US&hl=en", website_result, {}, true) != DownloadResult::OK) + goto fallback; + + api_key_index = website_result.find("INNERTUBE_API_KEY"); + if(api_key_index == std::string::npos) + goto fallback; + + api_key_index += 17; + api_key_start = std::find_if(website_result.begin() + api_key_index, website_result.end(), [](char c) { + return c != '"' && c != ':' && !is_whitespace(c); + }); + + if(api_key_start == website_result.end()) + goto fallback; + + api_key_index = api_key_start - website_result.begin(); + api_key_index_end = website_result.find('"', api_key_index); + if(api_key_index_end == std::string::npos) + goto fallback; + + api_key_length = api_key_index_end - api_key_index; + if(api_key_length > 512) // sanity check + goto fallback; + + return website_result.substr(api_key_index, api_key_length); + + fallback: + fprintf(stderr, "Failed to fetch youtube api key, fallback to %s\n", api_key.c_str()); + return "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; + } + + static std::string cpn; + + static bool generate_random_characters(char *buffer, int buffer_size, const char *alphabet, size_t alphabet_size) { + int fd = open("/dev/urandom", O_RDONLY); + if(fd == -1) { + perror("/dev/urandom"); + return false; + } + + if(read(fd, buffer, buffer_size) < buffer_size) { + fprintf(stderr, "Failed to read %d bytes from /dev/urandom\n", buffer_size); + close(fd); + return false; + } + + for(int i = 0; i < buffer_size; ++i) { + unsigned char c = *(unsigned char*)&buffer[i]; + buffer[i] = alphabet[c % alphabet_size]; + } + close(fd); + return true; + } + + static std::vector get_cookies() { + std::lock_guard lock(cookies_mutex); + if(cookies_filepath.empty()) { + YoutubeSignatureDecryptor::get_instance(); + + cpn.resize(16); + generate_random_characters(cpn.data(), cpn.size(), "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_", 64); + + Path cookies_filepath_p; + if(get_cookies_filepath(cookies_filepath_p, "youtube") != 0) { + show_notification("QuickMedia", "Failed to create youtube cookies file", Urgency::CRITICAL); + return {}; + } + + // TODO: Re-enable this if the api key ever changes in the future. + // Maybe also put signature decryption in the same request? since it requests the same page. + #if 0 + //api_key = youtube_page_find_api_key(); + #else + api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; + #endif + + if(get_file_type(cookies_filepath_p) == FileType::REGULAR) { + cookies_filepath = cookies_filepath_p.data; + } else { + Path cookies_filepath_tmp = cookies_filepath_p; + cookies_filepath_tmp.append(".tmp"); + + // TODO: This response also contains INNERTUBE_API_KEY which is the api key above. Maybe that should be parsed? + // TODO: Is there any way to bypass this? this is needed to set VISITOR_INFO1_LIVE which is required to read comments + const char *args[] = { "curl", "-I", "-s", "-f", "-L", "-b", cookies_filepath_tmp.data.c_str(), "-c", cookies_filepath_tmp.data.c_str(), "https://www.youtube.com/embed/watch?v=jNQXAC9IVRw&gl=US&hl=en", nullptr }; + if(exec_program(args, nullptr, nullptr) == 0) { + rename_atomic(cookies_filepath_tmp.data.c_str(), cookies_filepath_p.data.c_str()); + cookies_filepath = cookies_filepath_p.data; + } else { + show_notification("QuickMedia", "Failed to fetch cookies to view youtube comments", Urgency::CRITICAL); + return {}; + } + } + } + + return { + CommandArg{ "-b", cookies_filepath }, + CommandArg{ "-c", cookies_filepath } + }; + } + + // Sometimes youtube returns a redirect url (not in the header but in the body...). + // TODO: Find why this happens and if there is a way bypass it. + static std::string get_playback_url_recursive(std::string playback_url) { + std::vector additional_args = get_cookies(); + additional_args.push_back({ "-r", "0-4096" }); + + const int max_redirects = 5; + for(int i = 0; i < max_redirects; ++i) { + std::string response_body; + std::string response_headers; + download_to_string(playback_url, response_body, additional_args, true, true, &response_headers, 4096); + + std::string content_type = header_extract_value(response_headers, "content-type"); + if(content_type.empty()) { + fprintf(stderr, "Failed to find content-type in youtube video header. Trying to play the video anyways\n"); + return playback_url; + } + + if(string_starts_with(content_type, "video") || string_starts_with(content_type, "audio")) + return playback_url; + + if(response_body.empty()) { + fprintf(stderr, "Failed to redirect youtube video. Trying to play the video anyways\n"); + return playback_url; + } + + playback_url = std::move(response_body); + } + + return playback_url; + } + + void youtube_custom_redirect(std::string &video_url, std::string &audio_url) { + // TODO: Do this without threads + AsyncTask tasks[2]; + if(!video_url.empty()) + tasks[0] = AsyncTask([video_url]() { return get_playback_url_recursive(std::move(video_url)); }); + if(!audio_url.empty()) + tasks[1] = AsyncTask([audio_url]() { return get_playback_url_recursive(std::move(audio_url)); }); + + std::string *strings[2] = { &video_url, &audio_url }; + for(int i = 0; i < 2; ++i) { + if(tasks[i].valid()) + *strings[i] = tasks[i].get(); + } + } + // This is a common setup of text in the youtube json static std::optional yt_json_get_text(const Json::Value &json, const char *root_name) { if(!json.isObject()) @@ -427,131 +591,6 @@ R"END( } } - static std::mutex cookies_mutex; - static std::string cookies_filepath; - static std::string api_key; - - static bool is_whitespace(char c) { - return (c >= 8 && c <= 13) || c == ' '; - } - - // TODO: Cache this and redownload it when a network request fails with this api key? Do that in the same place as the signature, which means it would be done asynchronously - static std::string youtube_page_find_api_key() { - size_t api_key_index; - size_t api_key_index_end; - size_t api_key_length; - std::string website_result; - std::string::iterator api_key_start; - - if(download_to_string("https://www.youtube.com/?gl=US&hl=en", website_result, {}, true) != DownloadResult::OK) - goto fallback; - - api_key_index = website_result.find("INNERTUBE_API_KEY"); - if(api_key_index == std::string::npos) - goto fallback; - - api_key_index += 17; - api_key_start = std::find_if(website_result.begin() + api_key_index, website_result.end(), [](char c) { - return c != '"' && c != ':' && !is_whitespace(c); - }); - - if(api_key_start == website_result.end()) - goto fallback; - - api_key_index = api_key_start - website_result.begin(); - api_key_index_end = website_result.find('"', api_key_index); - if(api_key_index_end == std::string::npos) - goto fallback; - - api_key_length = api_key_index_end - api_key_index; - if(api_key_length > 512) // sanity check - goto fallback; - - return website_result.substr(api_key_index, api_key_length); - - fallback: - fprintf(stderr, "Failed to fetch youtube api key, fallback to %s\n", api_key.c_str()); - return "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; - } - - static std::string cpn; - - static bool generate_random_characters(char *buffer, int buffer_size, const char *alphabet, size_t alphabet_size) { - int fd = open("/dev/urandom", O_RDONLY); - if(fd == -1) { - perror("/dev/urandom"); - return false; - } - - if(read(fd, buffer, buffer_size) < buffer_size) { - fprintf(stderr, "Failed to read %d bytes from /dev/urandom\n", buffer_size); - close(fd); - return false; - } - - for(int i = 0; i < buffer_size; ++i) { - unsigned char c = *(unsigned char*)&buffer[i]; - buffer[i] = alphabet[c % alphabet_size]; - } - close(fd); - return true; - } - - static std::vector get_cookies() { - std::lock_guard lock(cookies_mutex); - if(cookies_filepath.empty()) { - YoutubeSignatureDecryptor::get_instance(); - - cpn.resize(16); - generate_random_characters(cpn.data(), cpn.size(), "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_", 64); - - Path cookies_filepath_p; - if(get_cookies_filepath(cookies_filepath_p, "youtube") != 0) { - show_notification("QuickMedia", "Failed to create youtube cookies file", Urgency::CRITICAL); - return {}; - } - - // TODO: Re-enable this if the api key ever changes in the future. - // Maybe also put signature decryption in the same request? since it requests the same page. - #if 0 - //api_key = youtube_page_find_api_key(); - #else - api_key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; - #endif - - if(get_file_type(cookies_filepath_p) == FileType::REGULAR) { - cookies_filepath = cookies_filepath_p.data; - } else { - Path cookies_filepath_tmp = cookies_filepath_p; - cookies_filepath_tmp.append(".tmp"); - - // TODO: This response also contains INNERTUBE_API_KEY which is the api key above. Maybe that should be parsed? - // TODO: Is there any way to bypass this? this is needed to set VISITOR_INFO1_LIVE which is required to read comments - const char *args[] = { "curl", "-I", "-s", "-f", "-L", "-b", cookies_filepath_tmp.data.c_str(), "-c", cookies_filepath_tmp.data.c_str(), "https://www.youtube.com/embed/watch?v=jNQXAC9IVRw&gl=US&hl=en", nullptr }; - if(exec_program(args, nullptr, nullptr) == 0) { - rename_atomic(cookies_filepath_tmp.data.c_str(), cookies_filepath_p.data.c_str()); - cookies_filepath = cookies_filepath_p.data; - } else { - show_notification("QuickMedia", "Failed to fetch cookies to view youtube comments", Urgency::CRITICAL); - return {}; - } - } - } - - return { - CommandArg{ "-b", cookies_filepath }, - CommandArg{ "-c", cookies_filepath } - }; - } - - static std::string remove_index_from_playlist_url(const std::string &url) { - std::string result = url; - size_t index = result.rfind("&index="); - if(index == std::string::npos) - return result; - return result.substr(0, index); - } - static std::shared_ptr parse_compact_video_renderer_json(const Json::Value &item_json, std::unordered_set &added_videos) { const Json::Value &compact_video_renderer_json = item_json["compactVideoRenderer"]; if(!compact_video_renderer_json.isObject()) @@ -1982,34 +2021,6 @@ R"END( return nullptr; } - // Sometimes youtube returns a redirect url (not in the header but in the body...). - // TODO: Find why this happens and if there is a way bypass it. - // Or maybe move this logic to QuickMedia video_content_page when mpv fails to start up (mpv will exit with exit code 2 and the message "Failed to recognize file format." when this happens). - // But that might be too slow for pinephone. - static std::string get_playback_url_recursive(std::string playback_url) { - const int max_redirects = 5; - for(int i = 0; i < max_redirects; ++i) { - std::string response_headers; - if(download_head_to_string(playback_url, response_headers, true) != DownloadResult::OK) - return ""; - - std::string content_type = header_extract_value(response_headers, "content-type"); - if(content_type.empty()) - return ""; - - if(string_starts_with(content_type, "video") || string_starts_with(content_type, "audio")) - return playback_url; - - // TODO: Download head and body in one request - std::string new_url; - if(download_to_string(playback_url, new_url, {}, true) != DownloadResult::OK) - return ""; - - playback_url = std::move(new_url); - } - return playback_url; - } - std::string YoutubeVideoPage::get_video_url(int max_height, bool &has_embedded_audio) { if(!hls_manifest_url.empty()) { has_embedded_audio = true; @@ -2036,7 +2047,7 @@ R"END( print_chosen_format(*chosen_video_format); has_embedded_audio = chosen_video_format->has_embedded_audio; - return get_playback_url_recursive(chosen_video_format->base.url); + return chosen_video_format->base.url; } std::string YoutubeVideoPage::get_audio_url() { @@ -2046,7 +2057,7 @@ R"END( // TODO: The "worst" (but still good) quality audio is chosen right now because youtube seeking freezes for up to 15 seconds when choosing the best quality const YoutubeAudioFormat *chosen_audio_format = &audio_formats.back(); fprintf(stderr, "Choosing youtube audio format: bitrate: %d, mime type: %s\n", chosen_audio_format->base.bitrate, chosen_audio_format->base.mime_type.c_str()); - return get_playback_url_recursive(chosen_audio_format->base.url); + return chosen_audio_format->base.url; } // Returns -1 if timestamp is in an invalid format @@ -2089,17 +2100,7 @@ R"END( return result; } - PluginResult YoutubeVideoPage::load(std::string &title, std::string &channel_url, std::vector &chapters) { - hls_manifest_url.clear(); - video_formats.clear(); - audio_formats.clear(); - - std::string video_id; - if(!youtube_url_extract_id(url, video_id)) { - fprintf(stderr, "Failed to extract youtube id from %s\n", url.c_str()); - return PluginResult::ERR; - } - + PluginResult YoutubeVideoPage::get_video_info(const std::string &video_id, Json::Value &json_root) { std::vector additional_args = get_cookies(); std::string response; @@ -2109,7 +2110,6 @@ R"END( std::string player_response_param = url_extract_param(response, "player_response"); player_response_param = url_param_decode(player_response_param); - Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; @@ -2118,27 +2118,83 @@ R"END( return PluginResult::ERR; } + return PluginResult::OK; + } + + PluginResult YoutubeVideoPage::load(std::string &title, std::string &channel_url, std::vector &chapters) { + hls_manifest_url.clear(); + video_formats.clear(); + audio_formats.clear(); + + std::string video_id; + if(!youtube_url_extract_id(url, video_id)) { + fprintf(stderr, "Failed to extract youtube id from %s\n", url.c_str()); + return PluginResult::ERR; + } + + #if 0 + std::string request_data = key_api_request_data; + string_replace_all(request_data, "%VIDEO_ID%", video_id); + + std::vector additional_args = { + { "-H", "Content-Type: application/json" }, + { "-H", "x-youtube-client-name: 1" }, + { "-H", youtube_client_version }, + { "--data-raw", std::move(request_data) } + }; + + std::vector cookies = get_cookies(); + additional_args.insert(additional_args.end(), cookies.begin(), cookies.end()); + + Json::Value json_root; + DownloadResult download_result = download_json(json_root, "https://www.youtube.com/youtubei/v1/player?key=" + api_key + "&gl=US&hl=en", additional_args, true); + if(download_result != DownloadResult::OK) return download_result_to_plugin_result(download_result); + if(!json_root.isObject()) return PluginResult::ERR; - const Json::Value &streaming_data_json = json_root["streamingData"]; - if(!streaming_data_json.isObject()) { + const Json::Value *streaming_data_json = &json_root["streamingData"]; + if(!streaming_data_json->isObject()) { const Json::Value &playability_status_json = json_root["playabilityStatus"]; if(playability_status_json.isObject()) { const Json::Value &status_json = playability_status_json["status"]; const Json::Value &reason_json = playability_status_json["reason"]; - if(status_json.isString()) - fprintf(stderr, "Youtube video loading failed, reason: (status: %s, reason: %s)\n", status_json.asCString(), reason_json.isString() ? reason_json.asCString() : "unknown"); + fprintf(stderr, "Warning: youtube video loading failed, reason: (status: %s, reason: %s), trying with get_video_info endpoint instead\n", status_json.isString() ? status_json.asCString() : "unknown", reason_json.isString() ? reason_json.asCString() : "unknown"); + + json_root = Json::Value(Json::nullValue); + PluginResult result = get_video_info(video_id, json_root); + if(result != PluginResult::OK) + return result; + + if(!json_root.isObject()) + return PluginResult::ERR; + + streaming_data_json = &json_root["streamingData"]; + if(!streaming_data_json->isObject()) + return PluginResult::ERR; } return PluginResult::ERR; } + #else + Json::Value json_root; + PluginResult result = get_video_info(video_id, json_root); + if(result != PluginResult::OK) + return result; + + if(!json_root.isObject()) + return PluginResult::ERR; + + const Json::Value *streaming_data_json = &json_root["streamingData"]; + if(!streaming_data_json->isObject()) + return PluginResult::ERR; + #endif // TODO: Verify if this always works (what about copyrighted live streams?), also what about choosing video quality for live stream? Maybe use mpv --hls-bitrate option? - const Json::Value &hls_manifest_url_json = streaming_data_json["hlsManifestUrl"]; + const Json::Value &hls_manifest_url_json = (*streaming_data_json)["hlsManifestUrl"]; if(hls_manifest_url_json.isString()) { hls_manifest_url = hls_manifest_url_json.asString(); } else { - parse_formats(streaming_data_json); + parse_formats(*streaming_data_json); if(video_formats.empty() && audio_formats.empty()) return PluginResult::ERR; } -- cgit v1.2.3