aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-05-11 16:04:02 +0200
committerdec05eba <dec05eba@protonmail.com>2021-05-11 16:04:52 +0200
commitd1920de838b17847b2e8a1520af0d82e670d5558 (patch)
tree520220da38f258f788a42ee0b13fac74d980acbc
parent2be5a938f906fa1ec95b424a0efd5fab3a1acdc7 (diff)
Parse content disposition better (when it contains encoding)
-rw-r--r--include/NetUtils.hpp1
-rw-r--r--src/DownloadUtils.cpp27
-rw-r--r--src/NetUtils.cpp40
3 files changed, 58 insertions, 10 deletions
diff --git a/include/NetUtils.hpp b/include/NetUtils.hpp
index bacafc7..a142884 100644
--- a/include/NetUtils.hpp
+++ b/include/NetUtils.hpp
@@ -12,6 +12,7 @@ namespace QuickMedia {
void html_escape_sequences(std::string &str);
void html_unescape_sequences(std::string &str);
std::string url_param_encode(const std::string &param);
+ std::string url_param_decode(const std::string &param);
std::vector<Range> extract_urls(const std::string &str);
std::vector<std::string> ranges_get_strings(const std::string &str, const std::vector<Range> &ranges);
void convert_utf8_to_utf32_ranges(const std::string &str, std::vector<Range> &ranges);
diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp
index a054454..5b5246a 100644
--- a/src/DownloadUtils.cpp
+++ b/src/DownloadUtils.cpp
@@ -73,7 +73,21 @@ namespace QuickMedia {
fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
std::string content_disposition = header_extract_value(header, "content-disposition");
- if(content_disposition.empty()) {
+ size_t filename_start = content_disposition.find("filename=");
+ if(filename_start == std::string::npos) {
+ // TODO: after filename*= the encoding type will follow. We need to support other formats than utf-8 as well
+ filename_start = content_disposition.find("filename*=");
+ if(filename_start != std::string::npos) {
+ filename_start += 10;
+ filename_start = content_disposition.find("''", filename_start);
+ if(filename_start != std::string::npos)
+ filename_start += 2;
+ }
+ } else {
+ filename_start += 9;
+ }
+
+ if(filename_start == std::string::npos) {
size_t filename_start = url.rfind('/');
if(filename_start == std::string::npos) {
result = "";
@@ -90,16 +104,9 @@ namespace QuickMedia {
}
}
- result = url.substr(filename_start, filename_end - filename_start);
+ result = url_param_decode(url.substr(filename_start, filename_end - filename_start));
return DownloadResult::OK;
} else {
- size_t filename_start = content_disposition.find("filename=");
- if(filename_start == std::string::npos) {
- result = "";
- return DownloadResult::OK;
- }
-
- filename_start += 9;
for(size_t i = filename_start; i < content_disposition.size(); ++i) {
char c = content_disposition[i];
if(c != '"' && c != ' ') {
@@ -117,7 +124,7 @@ namespace QuickMedia {
}
}
- result = content_disposition.substr(filename_start, filename_end - filename_start);
+ result = url_param_decode(content_disposition.substr(filename_start, filename_end - filename_start));
return DownloadResult::OK;
}
}
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index dc7c2d2..de908b1 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -1563,6 +1563,18 @@ namespace QuickMedia {
return c >= '0' && c <= '9';
}
+ // Returns -1 if its not a hex value
+ static int get_hex_value(char c) {
+ if(c >= '0' && c <= '9')
+ return c - '0';
+ else if(c >= 'A' && c <= 'F')
+ return 10 + (c - 'A');
+ else if(c >= 'a' && c <= 'f')
+ return 10 + (c - 'a');
+ else
+ return -1;
+ }
+
static bool is_whitespace(char c) {
return c == ' ' || c == '\t' || c == '\n';
}
@@ -1584,6 +1596,34 @@ namespace QuickMedia {
return result.str();
}
+ std::string url_param_decode(const std::string &param) {
+ std::string result;
+ for(int i = 0; i < (int)param.size();) {
+ char c = param[i];
+ if(c == '%') {
+ if(i < (int)param.size() - 1 && param[i + 1] == '%') {
+ result += c;
+ i += 2;
+ } else if(i < (int)param.size() - 2) {
+ int first_c = get_hex_value(param[i + 1]);
+ int second_c = get_hex_value(param[i + 2]);
+ if(first_c != -1 && second_c != -1) {
+ result += (char)((first_c << 4) | second_c);
+ i += 3;
+ } else {
+ i += 1;
+ }
+ } else {
+ i += 1;
+ }
+ } else {
+ result += c;
+ i += 1;
+ }
+ }
+ return result;
+ }
+
static bool is_url_start_char(char c) {
return is_alpha(c) || is_digit(c) || c == '-' || c == '.' || c == '_' || c == '~';
}