diff options
Diffstat (limited to 'src/DownloadUtils.cpp')
-rw-r--r-- | src/DownloadUtils.cpp | 100 |
1 files changed, 88 insertions, 12 deletions
diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp index 0977b78..756da10 100644 --- a/src/DownloadUtils.cpp +++ b/src/DownloadUtils.cpp @@ -10,19 +10,85 @@ #include <rapidjson/document.h> #include <rapidjson/filereadstream.h> -static const bool debug_download = false; +namespace QuickMedia { + struct DownloadUserdata { + std::string *header = nullptr; + std::string *body = nullptr; + int download_limit = 1024 * 1024 * 100; // 100mb + bool header_finished = false; + int total_downloaded_size = 0; + }; + + static const bool debug_download = false; -static int accumulate_string(char *data, int size, void *userdata) { - std::string *str = (std::string*)userdata; - if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable - return 1; - str->append(data, size); - return 0; -} + static int accumulate_string(char *data, int size, void *userdata) { + std::string *str = (std::string*)userdata; + if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable + return 1; + str->append(data, size); + return 0; + } -static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; + // Returns 0 if content length is not found + static long get_content_length(const std::string &header) { + std::string content_length_str = header_extract_value(header, "content-length"); + if(content_length_str.empty()) + return 0; + + errno = 0; + char *endptr; + const long content_length_num = strtol(content_length_str.c_str(), &endptr, 10); + if(endptr != content_length_str.c_str() && errno == 0) + return content_length_num; + + return 0; + } + + static int accumulate_string_with_header(char *data, int size, void *userdata) { + DownloadUserdata *download_userdata = (DownloadUserdata*)userdata; + + if(download_userdata->header_finished || !download_userdata->header) { + download_userdata->body->append(data, size); + } else { + download_userdata->header->append(data, size); + bool end_of_header_found = false; + size_t end_of_headers_index = download_userdata->header->find("\r\n\r\n"); + if(end_of_headers_index != std::string::npos) { + while(true) { + const long content_length = get_content_length(download_userdata->header->substr(0, end_of_headers_index)); // TODO: Do not create a copy of the header string + end_of_headers_index += 4; + if(content_length == 0 && download_userdata->header->size() - end_of_headers_index > 0) { + download_userdata->header->erase(download_userdata->header->begin(), download_userdata->header->begin() + end_of_headers_index); + end_of_headers_index = download_userdata->header->find("\r\n\r\n"); + if(end_of_headers_index == std::string::npos) + break; + } else { + end_of_header_found = true; + break; + } + } + } + + if(end_of_header_found) { + download_userdata->body->append(download_userdata->header->begin() + end_of_headers_index, download_userdata->header->end()); + if(download_userdata->body->find("Content-Type") != std::string::npos) { + fprintf(stderr, "Found header in body!!!!, header: |%s|, body: |%s|\n", download_userdata->header->c_str(), download_userdata->body->c_str()); + abort(); + } + download_userdata->header->erase(download_userdata->header->begin() + end_of_headers_index, download_userdata->header->end()); + download_userdata->header_finished = true; + } + } + + download_userdata->total_downloaded_size += size; + if(download_userdata->total_downloaded_size >= download_userdata->download_limit) + return 1; + + return 0; + } + + static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; -namespace QuickMedia { DownloadResult download_head_to_string(const std::string &url, std::string &result, bool use_browser_useragent, bool fail_on_error) { result.clear(); sf::Clock timer; @@ -132,7 +198,7 @@ namespace QuickMedia { } // TODO: Add timeout - DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_browser_useragent, bool fail_on_error) { + DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_browser_useragent, bool fail_on_error, std::string *header, int download_limit) { result.clear(); sf::Clock timer; std::vector<const char*> args; @@ -148,9 +214,12 @@ namespace QuickMedia { args.push_back("-H"); args.push_back(useragent_str); } + if(header) + args.push_back("-i"); args.push_back("--"); args.push_back(url.c_str()); args.push_back(nullptr); + if(debug_download) { for(const char *arg : args) { if(arg) @@ -158,8 +227,15 @@ namespace QuickMedia { } fprintf(stderr, "\n"); } - if(exec_program(args.data(), accumulate_string, &result) != 0) + + DownloadUserdata download_userdata; + download_userdata.header = header; + download_userdata.body = &result; + download_userdata.download_limit = download_limit; + + if(exec_program(args.data(), accumulate_string_with_header, &download_userdata) != 0) return DownloadResult::NET_ERR; + fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds()); return DownloadResult::OK; } |