#include "../include/DownloadUtils.hpp" #include "../include/Program.h" #include "../include/Storage.hpp" #include "../include/base64_url.hpp" #include static const bool debug_download = false; static int accumulate_string(char *data, int size, void *userdata) { std::string *str = (std::string*)userdata; str->append(data, size); return 0; } static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; namespace QuickMedia { // TODO: Add timeout DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector &additional_args, bool use_tor, bool use_browser_useragent, bool fail_on_error) { sf::Clock timer; std::vector args; if(use_tor) args.push_back("torsocks"); args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s", "-L" }); if(fail_on_error) args.push_back("-f"); for(const CommandArg &arg : additional_args) { args.push_back(arg.option.c_str()); args.push_back(arg.value.c_str()); } if(use_browser_useragent) { args.push_back("-H"); args.push_back(useragent_str); } args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); if(debug_download) { for(const char *arg : args) { if(arg) fprintf(stderr, "'%s' ", arg); } fprintf(stderr, "\n"); } if(exec_program(args.data(), accumulate_string, &result) != 0) return DownloadResult::NET_ERR; fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds()); return DownloadResult::OK; } DownloadResult download_to_string_cache(const std::string &url, std::string &result, const std::vector &additional_args, bool use_tor, bool use_browser_useragent) { Path media_dir = get_cache_dir().join("media"); Path media_file_path = Path(media_dir).join(base64_url::encode(url)); if(get_file_type(media_file_path) == FileType::REGULAR) { if(file_get_content(media_file_path, result) == 0) { fprintf(stderr, "Loaded %s from cache\n", url.c_str()); return DownloadResult::OK; } else { fprintf(stderr, "Failed to get content of cached media file: %s\n", media_file_path.data.c_str()); return DownloadResult::ERR; } } else { DownloadResult download_result = download_to_string(url, result, additional_args, use_tor, use_browser_useragent); if(download_result == DownloadResult::OK) { Path media_file_path_tmp(media_file_path.data + ".tmp"); if(create_directory_recursive(media_dir) == 0 && file_overwrite(media_file_path_tmp, result) == 0) { if(rename(media_file_path_tmp.data.c_str(), media_file_path.data.c_str()) != 0) { perror("rename"); download_result = DownloadResult::ERR; } } else { download_result = DownloadResult::ERR; } } return download_result; } } std::vector create_command_args_from_form_data(const std::vector &form_data) { // TODO: This boundary value might need to change, depending on the content. What if the form data contains the boundary value? const std::string boundary = "-----------------------------119561554312148213571335532670"; std::string form_data_str; for(const FormData &form_data_item : form_data) { form_data_str += boundary; form_data_str += "\r\n"; // TODO: What if the form key contains " or \r\n ? form_data_str += "Content-Disposition: form-data; name=\"" + form_data_item.key + "\""; form_data_str += "\r\n\r\n"; // TODO: What is the value contains \r\n ? form_data_str += form_data_item.value; form_data_str += "\r\n"; } // TODO: Verify if this should only be done also if the form data is empty form_data_str += boundary + "--"; return { CommandArg{"-H", "Content-Type: multipart/form-data; boundary=" + boundary}, CommandArg{"--data-binary", std::move(form_data_str)} }; } }