From d1920de838b17847b2e8a1520af0d82e670d5558 Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Tue, 11 May 2021 16:04:02 +0200
Subject: Parse content disposition better (when it contains encoding)

---
 src/DownloadUtils.cpp | 27 +++++++++++++++++----------
 src/NetUtils.cpp      | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp
index a054454..5b5246a 100644
--- a/src/DownloadUtils.cpp
+++ b/src/DownloadUtils.cpp
@@ -73,7 +73,21 @@ namespace QuickMedia {
         fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
 
         std::string content_disposition = header_extract_value(header, "content-disposition");
-        if(content_disposition.empty()) {
+        size_t filename_start = content_disposition.find("filename=");
+        if(filename_start == std::string::npos) {
+            // TODO: after filename*= the encoding type will follow. We need to support other formats than utf-8 as well
+            filename_start = content_disposition.find("filename*=");
+            if(filename_start != std::string::npos) {
+                filename_start += 10;
+                filename_start = content_disposition.find("''", filename_start);
+                if(filename_start != std::string::npos)
+                    filename_start += 2;
+            }
+        } else {
+            filename_start += 9;
+        }
+
+        if(filename_start == std::string::npos) {
             size_t filename_start = url.rfind('/');
             if(filename_start == std::string::npos) {
                 result = "";
@@ -90,16 +104,9 @@ namespace QuickMedia {
                 }
             }
 
-            result = url.substr(filename_start, filename_end - filename_start);
+            result = url_param_decode(url.substr(filename_start, filename_end - filename_start));
             return DownloadResult::OK;
         } else {
-            size_t filename_start = content_disposition.find("filename=");
-            if(filename_start == std::string::npos) {
-                result = "";
-                return DownloadResult::OK;
-            }
-
-            filename_start += 9;
             for(size_t i = filename_start; i < content_disposition.size(); ++i) {
                 char c = content_disposition[i];
                 if(c != '"' && c != ' ') {
@@ -117,7 +124,7 @@ namespace QuickMedia {
                 }
             }
 
-            result = content_disposition.substr(filename_start, filename_end - filename_start);
+            result = url_param_decode(content_disposition.substr(filename_start, filename_end - filename_start));
             return DownloadResult::OK;
         }
     }
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index dc7c2d2..de908b1 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -1563,6 +1563,18 @@ namespace QuickMedia {
         return c >= '0' && c <= '9';
     }
 
+    // Returns -1 if its not a hex value
+    static int get_hex_value(char c) {
+        if(c >= '0' && c <= '9')
+            return c - '0';
+        else if(c >= 'A' && c <= 'F')
+            return 10 + (c - 'A');
+        else if(c >= 'a' && c <= 'f')
+            return 10 + (c - 'a');
+        else
+            return -1;
+    }
+
     static bool is_whitespace(char c) {
         return c == ' ' || c == '\t' || c == '\n';
     }
@@ -1584,6 +1596,34 @@ namespace QuickMedia {
         return result.str();
     }
 
+    std::string url_param_decode(const std::string &param) {
+        std::string result;
+        for(int i = 0; i < (int)param.size();) {
+            char c = param[i];
+            if(c == '%') {
+                if(i < (int)param.size() - 1 && param[i + 1] == '%') {
+                    result += c;
+                    i += 2;
+                } else if(i < (int)param.size() - 2) {
+                    int first_c = get_hex_value(param[i + 1]);
+                    int second_c = get_hex_value(param[i + 2]);
+                    if(first_c != -1 && second_c != -1) {
+                        result += (char)((first_c << 4) | second_c);
+                        i += 3;
+                    } else {
+                        i += 1;
+                    }
+                } else {
+                    i += 1;
+                }
+            } else {
+                result += c;
+                i += 1;
+            }
+        }
+        return result;
+    }
+
     static bool is_url_start_char(char c) {
         return is_alpha(c) || is_digit(c) || c == '-' || c == '.' || c == '_' || c == '~';
     }
-- 
cgit v1.2.3-70-g09d2