#include "../include/DownloadUtils.hpp" #include "../include/Program.hpp" #include "../include/Storage.hpp" #include "../include/NetUtils.hpp" #include "../external/hash-library/sha256.h" #include #include #include #include #include #include namespace QuickMedia { struct DownloadUserdata { std::vector *headers = nullptr; std::string *body = nullptr; int64_t download_limit = 1024 * 1024 * 100; // 100mb bool headers_finished = false; int64_t total_downloaded_size = 0; }; static int accumulate_string(char *data, int size, void *userdata) { std::string *str = (std::string*)userdata; if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable return 1; str->append(data, size); return 0; } static bool http_is_redirect(const char *header, size_t size) { const void *end_of_first_line_p = memmem(header, size, "\r\n", 2); if(!end_of_first_line_p) return false; return memmem(header, (const char*)end_of_first_line_p - header, " 30", 3) != nullptr; } static int accumulate_string_with_header(char *data, int size, void *userdata) { DownloadUserdata *download_userdata = (DownloadUserdata*)userdata; if(download_userdata->headers_finished) { download_userdata->body->append(data, size); } else { if(download_userdata->headers->empty()) download_userdata->headers->push_back(""); std::string *current_header = &download_userdata->headers->back(); current_header->append(data, size); bool end_of_header_found = false; size_t end_of_headers_index = current_header->find("\r\n\r\n"); if(end_of_headers_index != std::string::npos) { while(true) { const bool is_redirect = http_is_redirect(current_header->c_str(), end_of_headers_index); end_of_headers_index += 4; if(is_redirect) { std::string header_after_this_header = current_header->substr(end_of_headers_index); current_header->erase(current_header->begin() + end_of_headers_index, current_header->end()); download_userdata->headers->push_back(std::move(header_after_this_header)); current_header = &download_userdata->headers->back(); end_of_headers_index = current_header->find("\r\n\r\n"); if(end_of_headers_index == std::string::npos) break; } else { end_of_header_found = true; break; } } } if(end_of_header_found) { download_userdata->body->append(current_header->begin() + end_of_headers_index, current_header->end()); current_header->erase(current_header->begin() + end_of_headers_index, current_header->end()); download_userdata->headers_finished = true; } } download_userdata->total_downloaded_size += size; if(download_userdata->total_downloaded_size >= download_userdata->download_limit) return 1; return 0; } static const bool debug_download = false; static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; DownloadResult download_head_to_string(const std::string &url, std::string &result, bool use_browser_useragent, bool fail_on_error) { result.clear(); mgl::Clock timer; std::vector args; args.insert(args.end(), { "curl", "-I", "-g", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s" }); if(fail_on_error) args.push_back("-f"); if(use_browser_useragent) { args.push_back("-H"); args.push_back(useragent_str); } args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); if(debug_download) { for(const char *arg : args) { if(arg) fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } if(exec_program(args.data(), accumulate_string, &result) != 0) return DownloadResult::NET_ERR; fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), (int)(timer.get_elapsed_time_seconds() * 1000.0)); return DownloadResult::OK; } DownloadResult url_get_remote_name(const std::string &url, std::string &result, bool use_browser_useragent) { result.clear(); mgl::Clock timer; std::vector args; args.insert(args.end(), { "curl", "-I", "-g", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s" }); if(use_browser_useragent) { args.push_back("-H"); args.push_back(useragent_str); } args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); if(debug_download) { for(const char *arg : args) { if(arg) fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } std::string header; if(exec_program(args.data(), accumulate_string, &header) != 0) return DownloadResult::NET_ERR; fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), (int)(timer.get_elapsed_time_seconds() * 1000.0)); std::string content_disposition = header_extract_value(header, "content-disposition"); // TODO: after filename*= the encoding type will follow. We need to support other formats than utf-8 as well size_t filename_start = content_disposition.find("filename*="); if(filename_start == std::string::npos) { filename_start = content_disposition.find("filename="); if(filename_start != std::string::npos) { filename_start += 9; } } else { filename_start += 10; filename_start = content_disposition.find("''", filename_start); if(filename_start != std::string::npos) filename_start += 2; } if(filename_start == std::string::npos) { size_t filename_start = url.rfind('/'); if(filename_start == std::string::npos) { result = ""; return DownloadResult::OK; } ++filename_start; size_t filename_end = url.size(); for(size_t i = filename_start; i < url.size(); ++i) { char c = url[i]; if(c == '/' || c == '&' || c == '?') { filename_end = i; break; } } result = url_param_decode(url.substr(filename_start, filename_end - filename_start)); return DownloadResult::OK; } else { for(size_t i = filename_start; i < content_disposition.size(); ++i) { char c = content_disposition[i]; if(c != '"' && c != ' ') { filename_start = i; break; } } size_t filename_end = content_disposition.size(); for(int i = filename_end - 1; i >= (int)filename_start; --i) { char c = content_disposition[i]; if(c != '"' && c != ' ' && c != '\n' && c != '\r') { filename_end = i + 1; break; } } result = url_param_decode(content_disposition.substr(filename_start, filename_end - filename_start)); return DownloadResult::OK; } } // TODO: Add timeout DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector &additional_args, bool use_browser_useragent, bool fail_on_error, bool cloudflare_bypass, std::vector *headers, int download_limit) { result.clear(); mgl::Clock timer; std::vector args; if(cloudflare_bypass) { args.insert(args.end(), { "wget", "--header", "Accept-Language: en-US,en;q=0.5", "-q", "-O", "-" }); // TODO: //if(fail_on_error) // args.push_back("-f"); // TODO: May not be compatible for(const CommandArg &arg : additional_args) { args.push_back(arg.option.c_str()); if(!arg.value.empty()) args.push_back(arg.value.c_str()); } if(use_browser_useragent) { args.push_back("--header"); args.push_back(useragent_str); } // TODO: //if(headers) // args.push_back("-i"); } else { args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-g", "-s", "-L" }); if(fail_on_error) args.push_back("-f"); for(const CommandArg &arg : additional_args) { args.push_back(arg.option.c_str()); if(!arg.value.empty()) args.push_back(arg.value.c_str()); } if(use_browser_useragent) { args.push_back("-H"); args.push_back(useragent_str); } if(headers) args.push_back("-i"); } args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); if(debug_download) { for(const char *arg : args) { if(arg) fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } DownloadUserdata download_userdata; download_userdata.headers = headers; download_userdata.body = &result; download_userdata.download_limit = download_limit; download_userdata.headers_finished = !headers; if(exec_program(args.data(), accumulate_string_with_header, &download_userdata) != 0) return DownloadResult::NET_ERR; fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), (int)(timer.get_elapsed_time_seconds() * 1000.0)); return DownloadResult::OK; } DownloadResult download_to_string_cache(const std::string &url, std::string &result, const std::vector &additional_args, bool use_browser_useragent, DownloadErrorHandler error_handler, Path cache_path) { result.clear(); Path media_file_path; if(cache_path.data.empty()) { SHA256 sha256; sha256.add(url.data(), url.size()); media_file_path = get_cache_dir().join("media").join(sha256.getHash()); } else { media_file_path = std::move(cache_path); } if(get_file_type(media_file_path) == FileType::REGULAR) { if(file_get_content(media_file_path, result) == 0) { fprintf(stderr, "Loaded %s from cache\n", url.c_str()); return DownloadResult::OK; } else { fprintf(stderr, "Failed to get content of cached media file: %s\n", media_file_path.data.c_str()); return DownloadResult::ERR; } } else { DownloadResult download_result = download_to_string(url, result, additional_args, use_browser_useragent, error_handler ? false : true); if(download_result == DownloadResult::OK && error_handler) download_result = error_handler(result) ? DownloadResult::OK : DownloadResult::ERR; if(download_result == DownloadResult::OK) download_result = file_overwrite_atomic(media_file_path, result) == 0 ? DownloadResult::OK : DownloadResult::ERR; return download_result; } } DownloadResult download_to_file(const std::string &url, const std::string &destination_filepath, const std::vector &additional_args, bool use_browser_useragent, bool cloudflare_bypass) { Path tmp_filepath = destination_filepath; tmp_filepath.append(".tmp"); // TODO: Optimize with temporary '\0' size_t dir_end = tmp_filepath.data.rfind('/'); if(dir_end != std::string::npos && create_directory_recursive(tmp_filepath.data.substr(0, dir_end)) != 0) return DownloadResult::ERR; std::vector args = additional_args; args.push_back({ "-o", tmp_filepath.data.c_str() }); std::string dummy; DownloadResult res = download_to_string(url, dummy, std::move(args), use_browser_useragent, true, cloudflare_bypass); if(res != DownloadResult::OK) return res; if(rename_atomic(tmp_filepath.data.c_str(), destination_filepath.c_str()) != 0) { perror("rename"); return DownloadResult::ERR; } return DownloadResult::OK; } bool download_async_gui(const std::string &url, const std::string &file_manager_start_dir, bool no_video) { char quickmedia_path[PATH_MAX]; ssize_t bytes_written = readlink("/proc/self/exe", quickmedia_path, sizeof(quickmedia_path) - 1); if(bytes_written == -1) strcpy(quickmedia_path, "quickmedia"); else quickmedia_path[bytes_written] = '\0'; std::vector args = { quickmedia_path, "download", "-u", url.c_str(), "--dir", file_manager_start_dir.c_str() }; if(no_video) args.push_back("--no-video"); args.push_back(nullptr); return exec_program_async(args.data(), nullptr) == 0; } // TODO: Add timeout DownloadResult download_to_json(const std::string &url, rapidjson::Document &result, const std::vector &additional_args, bool use_browser_useragent, bool fail_on_error) { mgl::Clock timer; std::vector args; args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-g", "-s", "-L" }); if(fail_on_error) args.push_back("-f"); for(const CommandArg &arg : additional_args) { args.push_back(arg.option.c_str()); if(!arg.value.empty()) args.push_back(arg.value.c_str()); } if(use_browser_useragent) { args.push_back("-H"); args.push_back(useragent_str); } args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); if(debug_download) { for(const char *arg : args) { if(arg) fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } ReadProgram read_program; if(exec_program_pipe(args.data(), &read_program) != 0) return DownloadResult::NET_ERR; FILE *file = fdopen(read_program.read_fd, "rb"); if(!file) { program_clear_current_thread(); wait_program(read_program.pid); return DownloadResult::ERR; } char read_buffer[8192]; rapidjson::FileReadStream is(file, read_buffer, sizeof(read_buffer)); rapidjson::ParseResult parse_result = result.ParseStream(is); program_clear_current_thread(); fclose(file); wait_program(read_program.pid); fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), (int)(timer.get_elapsed_time_seconds() * 1000.0)); return parse_result.IsError() ? DownloadResult::ERR : DownloadResult::OK; } }