aboutsummaryrefslogtreecommitdiff
path: root/src/DownloadUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/DownloadUtils.cpp')
-rw-r--r--src/DownloadUtils.cpp100
1 files changed, 88 insertions, 12 deletions
diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp
index 0977b78..756da10 100644
--- a/src/DownloadUtils.cpp
+++ b/src/DownloadUtils.cpp
@@ -10,19 +10,85 @@
#include <rapidjson/document.h>
#include <rapidjson/filereadstream.h>
-static const bool debug_download = false;
+namespace QuickMedia {
+ struct DownloadUserdata {
+ std::string *header = nullptr;
+ std::string *body = nullptr;
+ int download_limit = 1024 * 1024 * 100; // 100mb
+ bool header_finished = false;
+ int total_downloaded_size = 0;
+ };
+
+ static const bool debug_download = false;
-static int accumulate_string(char *data, int size, void *userdata) {
- std::string *str = (std::string*)userdata;
- if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable
- return 1;
- str->append(data, size);
- return 0;
-}
+ static int accumulate_string(char *data, int size, void *userdata) {
+ std::string *str = (std::string*)userdata;
+ if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable
+ return 1;
+ str->append(data, size);
+ return 0;
+ }
-static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36";
+ // Returns 0 if content length is not found
+ static long get_content_length(const std::string &header) {
+ std::string content_length_str = header_extract_value(header, "content-length");
+ if(content_length_str.empty())
+ return 0;
+
+ errno = 0;
+ char *endptr;
+ const long content_length_num = strtol(content_length_str.c_str(), &endptr, 10);
+ if(endptr != content_length_str.c_str() && errno == 0)
+ return content_length_num;
+
+ return 0;
+ }
+
+ static int accumulate_string_with_header(char *data, int size, void *userdata) {
+ DownloadUserdata *download_userdata = (DownloadUserdata*)userdata;
+
+ if(download_userdata->header_finished || !download_userdata->header) {
+ download_userdata->body->append(data, size);
+ } else {
+ download_userdata->header->append(data, size);
+ bool end_of_header_found = false;
+ size_t end_of_headers_index = download_userdata->header->find("\r\n\r\n");
+ if(end_of_headers_index != std::string::npos) {
+ while(true) {
+ const long content_length = get_content_length(download_userdata->header->substr(0, end_of_headers_index)); // TODO: Do not create a copy of the header string
+ end_of_headers_index += 4;
+ if(content_length == 0 && download_userdata->header->size() - end_of_headers_index > 0) {
+ download_userdata->header->erase(download_userdata->header->begin(), download_userdata->header->begin() + end_of_headers_index);
+ end_of_headers_index = download_userdata->header->find("\r\n\r\n");
+ if(end_of_headers_index == std::string::npos)
+ break;
+ } else {
+ end_of_header_found = true;
+ break;
+ }
+ }
+ }
+
+ if(end_of_header_found) {
+ download_userdata->body->append(download_userdata->header->begin() + end_of_headers_index, download_userdata->header->end());
+ if(download_userdata->body->find("Content-Type") != std::string::npos) {
+ fprintf(stderr, "Found header in body!!!!, header: |%s|, body: |%s|\n", download_userdata->header->c_str(), download_userdata->body->c_str());
+ abort();
+ }
+ download_userdata->header->erase(download_userdata->header->begin() + end_of_headers_index, download_userdata->header->end());
+ download_userdata->header_finished = true;
+ }
+ }
+
+ download_userdata->total_downloaded_size += size;
+ if(download_userdata->total_downloaded_size >= download_userdata->download_limit)
+ return 1;
+
+ return 0;
+ }
+
+ static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36";
-namespace QuickMedia {
DownloadResult download_head_to_string(const std::string &url, std::string &result, bool use_browser_useragent, bool fail_on_error) {
result.clear();
sf::Clock timer;
@@ -132,7 +198,7 @@ namespace QuickMedia {
}
// TODO: Add timeout
- DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_browser_useragent, bool fail_on_error) {
+ DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_browser_useragent, bool fail_on_error, std::string *header, int download_limit) {
result.clear();
sf::Clock timer;
std::vector<const char*> args;
@@ -148,9 +214,12 @@ namespace QuickMedia {
args.push_back("-H");
args.push_back(useragent_str);
}
+ if(header)
+ args.push_back("-i");
args.push_back("--");
args.push_back(url.c_str());
args.push_back(nullptr);
+
if(debug_download) {
for(const char *arg : args) {
if(arg)
@@ -158,8 +227,15 @@ namespace QuickMedia {
}
fprintf(stderr, "\n");
}
- if(exec_program(args.data(), accumulate_string, &result) != 0)
+
+ DownloadUserdata download_userdata;
+ download_userdata.header = header;
+ download_userdata.body = &result;
+ download_userdata.download_limit = download_limit;
+
+ if(exec_program(args.data(), accumulate_string_with_header, &download_userdata) != 0)
return DownloadResult::NET_ERR;
+
fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
return DownloadResult::OK;
}