1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#include "../include/DownloadUtils.hpp"
#include "../include/Program.h"
#include "../include/Storage.hpp"
#include <SFML/System/Clock.hpp>
#include <cppcodec/base64_rfc4648.hpp>
static const bool debug_download = false;
static int accumulate_string(char *data, int size, void *userdata) {
std::string *str = (std::string*)userdata;
str->append(data, size);
return 0;
}
static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36";
namespace QuickMedia {
// TODO: Add timeout
DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent) {
sf::Clock timer;
std::vector<const char*> args;
if(use_tor)
args.push_back("torsocks");
args.insert(args.end(), { "curl", "-f", "-H", "Accept-Language: en-US,en;q=0.5", "--compressed", "-s", "-L" });
for(const CommandArg &arg : additional_args) {
args.push_back(arg.option.c_str());
args.push_back(arg.value.c_str());
}
if(use_browser_useragent) {
args.push_back("-H");
args.push_back(useragent_str);
}
args.push_back("--");
args.push_back(url.c_str());
args.push_back(nullptr);
if(debug_download) {
for(const char *arg : args) {
if(arg)
fprintf(stderr, "'%s' ", arg);
}
fprintf(stderr, "\n");
}
if(exec_program(args.data(), accumulate_string, &result) != 0)
return DownloadResult::NET_ERR;
fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
return DownloadResult::OK;
}
DownloadResult download_to_string_cache(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent) {
Path media_dir = get_cache_dir().join("media");
Path media_file_path = Path(media_dir).join(cppcodec::base64_rfc4648::encode(url));
Path media_file_path_tmp(media_file_path.data + ".tmp");
if(get_file_type(media_file_path) == FileType::REGULAR) {
if(file_get_content(media_file_path, result) == 0) {
fprintf(stderr, "Loaded %s from cache\n", url.c_str());
return DownloadResult::OK;
} else {
fprintf(stderr, "Failed to get content of cached media file: %s\n", media_file_path.data.c_str());
return DownloadResult::ERR;
}
} else {
DownloadResult download_result = download_to_string(url, result, additional_args, use_tor, use_browser_useragent);
if(download_result == DownloadResult::OK) {
if(create_directory_recursive(media_dir) == 0 && file_overwrite(media_file_path_tmp, result) == 0) {
if(rename(media_file_path_tmp.data.c_str(), media_file_path.data.c_str()) != 0) {
perror("rename");
download_result = DownloadResult::ERR;
}
} else {
download_result = DownloadResult::ERR;
}
}
return download_result;
}
}
std::vector<CommandArg> create_command_args_from_form_data(const std::vector<FormData> &form_data) {
// TODO: This boundary value might need to change, depending on the content. What if the form data contains the boundary value?
const std::string boundary = "-----------------------------119561554312148213571335532670";
std::string form_data_str;
for(const FormData &form_data_item : form_data) {
form_data_str += boundary;
form_data_str += "\r\n";
// TODO: What if the form key contains " or \r\n ?
form_data_str += "Content-Disposition: form-data; name=\"" + form_data_item.key + "\"";
form_data_str += "\r\n\r\n";
// TODO: What is the value contains \r\n ?
form_data_str += form_data_item.value;
form_data_str += "\r\n";
}
// TODO: Verify if this should only be done also if the form data is empty
form_data_str += boundary + "--";
return {
CommandArg{"-H", "Content-Type: multipart/form-data; boundary=" + boundary},
CommandArg{"--data-binary", std::move(form_data_str)}
};
}
}
|