aboutsummaryrefslogtreecommitdiff
path: root/src/DownloadUtils.cpp
blob: 8782020f34ca63626a145eddf70b00b60fcd8e7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include "../include/DownloadUtils.hpp"
#include "../include/Program.h"
#include "../include/Storage.hpp"
#include "../include/base64_url.hpp"
#include <SFML/System/Clock.hpp>
#include <rapidjson/document.h>
#include <rapidjson/filereadstream.h>

static const bool debug_download = false;

static int accumulate_string(char *data, int size, void *userdata) {
    std::string *str = (std::string*)userdata;
    if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable
        return 1;
    str->append(data, size);
    return 0;
}

static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36";

namespace QuickMedia {
    // TODO: Add timeout
    DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent, bool fail_on_error) {
        sf::Clock timer;
        std::vector<const char*> args;
        if(use_tor)
            args.push_back("torsocks");
        args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s", "-L" });
        if(fail_on_error)
            args.push_back("-f");
        for(const CommandArg &arg : additional_args) {
            args.push_back(arg.option.c_str());
            args.push_back(arg.value.c_str());
        }
        if(use_browser_useragent) {
            args.push_back("-H");
            args.push_back(useragent_str);
        }
        args.push_back("--");
        args.push_back(url.c_str());
        args.push_back(nullptr);
        if(debug_download) {
            for(const char *arg : args) {
                if(arg)
                    fprintf(stderr, "'%s' ", arg);
            }
            fprintf(stderr, "\n");
        }
        if(exec_program(args.data(), accumulate_string, &result) != 0)
            return DownloadResult::NET_ERR;
        fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
        return DownloadResult::OK;
    }

    DownloadResult download_to_string_cache(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent) {
        Path media_dir = get_cache_dir().join("media");
        Path media_file_path = Path(media_dir).join(base64_url::encode(url));
        if(get_file_type(media_file_path) == FileType::REGULAR) {
            if(file_get_content(media_file_path, result) == 0) {
                fprintf(stderr, "Loaded %s from cache\n", url.c_str());
                return DownloadResult::OK;
            } else {
                fprintf(stderr, "Failed to get content of cached media file: %s\n", media_file_path.data.c_str());
                return DownloadResult::ERR;
            }
        } else {
            DownloadResult download_result = download_to_string(url, result, additional_args, use_tor, use_browser_useragent);
            if(download_result == DownloadResult::OK) {
                Path media_file_path_tmp(media_file_path.data + ".tmp");
                if(create_directory_recursive(media_dir) == 0 && file_overwrite(media_file_path_tmp, result) == 0) {
                    if(rename(media_file_path_tmp.data.c_str(), media_file_path.data.c_str()) != 0) {
                        perror("rename");
                        download_result = DownloadResult::ERR;
                    }
                } else {
                    download_result = DownloadResult::ERR;
                }
            }
            return download_result;
        }
    }

    // TODO: Add timeout
    DownloadResult download_to_json(const std::string &url, rapidjson::Document &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent, bool fail_on_error) {
        sf::Clock timer;
        std::vector<const char*> args;
        if(use_tor)
            args.push_back("torsocks");
        args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s", "-L" });
        if(fail_on_error)
            args.push_back("-f");
        for(const CommandArg &arg : additional_args) {
            args.push_back(arg.option.c_str());
            args.push_back(arg.value.c_str());
        }
        if(use_browser_useragent) {
            args.push_back("-H");
            args.push_back(useragent_str);
        }
        args.push_back("--");
        args.push_back(url.c_str());
        args.push_back(nullptr);
        if(debug_download) {
            for(const char *arg : args) {
                if(arg)
                    fprintf(stderr, "'%s' ", arg);
            }
            fprintf(stderr, "\n");
        }

        ReadProgram read_program;
        if(exec_program_pipe(args.data(), &read_program) != 0)
            return DownloadResult::NET_ERR;
        
        FILE *file = fdopen(read_program.read_fd, "rb");
        char read_buffer[8192];
        rapidjson::FileReadStream is(file, read_buffer, sizeof(read_buffer));
        rapidjson::ParseResult parse_result = result.ParseStream(is);

        fclose(file);
        wait_program(read_program.pid);
        fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds());
        return parse_result.IsError() ? DownloadResult::ERR : DownloadResult::OK;
    }
}