diff options
author | dec05eba <dec05eba@protonmail.com> | 2020-10-16 04:49:14 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2020-10-16 04:49:14 +0200 |
commit | 221522cf995cbcd39c956f66423a26bbccae8f72 (patch) | |
tree | b09dd14964465e4d69082725d4ad950141692d8f | |
parent | 66a97007eb36a112f31e923c20e434ba8b39c4ba (diff) |
Matrix: stream download to rapidjson parser
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | include/DownloadUtils.hpp | 2 | ||||
-rw-r--r-- | include/Program.h | 15 | ||||
-rw-r--r-- | src/DownloadUtils.cpp | 45 | ||||
-rw-r--r-- | src/Program.c | 136 | ||||
-rw-r--r-- | src/plugins/Matrix.cpp | 16 |
6 files changed, 111 insertions, 105 deletions
@@ -106,4 +106,4 @@ Retry download if it fails, at least 3 times (observed to be needed for mangadex Readd autocomplete, but make it better with a proper list. Also readd 4chan login page and manganelo creators page. Fix logout/login in matrix. Currently it doesn't work because data is cleared while sync is in progress, leading to the first sync sometimes being with previous data... Modify sfml to use GL_COMPRESSED_LUMINANCE and other texture compression modes (in sf::Texture). This reduces memory usage by half. -Decrease memory usage even further (mostly in matrix /sync when part of large rooms) by using rapidjson SAX style API to stream downloaded json into the json parser, instead of using download_to_string with accumulate_string.
\ No newline at end of file +Decrease memory usage even further (mostly in matrix /sync when part of large rooms) by using rapidjson SAX style API to stream json string into SAX style parsing.
\ No newline at end of file diff --git a/include/DownloadUtils.hpp b/include/DownloadUtils.hpp index 7ce4b15..2d2b813 100644 --- a/include/DownloadUtils.hpp +++ b/include/DownloadUtils.hpp @@ -2,6 +2,7 @@ #include <string> #include <vector> +#include <rapidjson/document.h> namespace QuickMedia { enum class DownloadResult { @@ -17,4 +18,5 @@ namespace QuickMedia { DownloadResult download_to_string(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent = false, bool fail_on_error = true); DownloadResult download_to_string_cache(const std::string &url, std::string &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent = false); + DownloadResult download_to_json(const std::string &url, rapidjson::Document &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent = false, bool fail_on_error = true); }
\ No newline at end of file diff --git a/include/Program.h b/include/Program.h index 3cbf09e..cab8d26 100644 --- a/include/Program.h +++ b/include/Program.h @@ -10,8 +10,7 @@ extern "C" { typedef struct { pid_t pid; int read_fd; - int write_fd; -} ProgramPipe; +} ReadProgram; /* Return 0 if you want to continue reading. @data is null-terminated */ typedef int (*ProgramOutputCallback)(char *data, int size, void *userdata); @@ -20,6 +19,12 @@ typedef int (*ProgramOutputCallback)(char *data, int size, void *userdata); @args need to have at least 2 arguments. The first which is the program name and the last which is NULL, which indicates end of args */ +int exec_program_pipe(const char **args, ReadProgram *read_program); + +/* + @args need to have at least 2 arguments. The first which is the program name + and the last which is NULL, which indicates end of args +*/ int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata); // Return the exit status, or a negative value if waiting failed @@ -35,12 +40,6 @@ int wait_program_non_blocking(pid_t process_id, int *status); and you want the child process to be cleaned up automatically when it dies. */ int exec_program_async(const char **args, pid_t *result_process_id); -#if 0 - -int program_pipe_write(ProgramPipe *self, const char *data, size_t size); -int program_pipe_read(ProgramPipe *self, ProgramOutputCallback output_callback, void *userdata); -void program_pipe_close(ProgramPipe *self); -#endif #ifdef __cplusplus } diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp index c44bed5..4f8b5b9 100644 --- a/src/DownloadUtils.cpp +++ b/src/DownloadUtils.cpp @@ -3,6 +3,8 @@ #include "../include/Storage.hpp" #include "../include/base64_url.hpp" #include <SFML/System/Clock.hpp> +#include <rapidjson/filereadstream.h> +#include <unistd.h> static const bool debug_download = false; @@ -77,4 +79,47 @@ namespace QuickMedia { return download_result; } } + + // TODO: Add timeout + DownloadResult download_to_json(const std::string &url, rapidjson::Document &result, const std::vector<CommandArg> &additional_args, bool use_tor, bool use_browser_useragent, bool fail_on_error) { + sf::Clock timer; + std::vector<const char*> args; + if(use_tor) + args.push_back("torsocks"); + args.insert(args.end(), { "curl", "-H", "Accept-Language: en-US,en;q=0.5", "-H", "Connection: keep-alive", "--compressed", "-s", "-L" }); + if(fail_on_error) + args.push_back("-f"); + for(const CommandArg &arg : additional_args) { + args.push_back(arg.option.c_str()); + args.push_back(arg.value.c_str()); + } + if(use_browser_useragent) { + args.push_back("-H"); + args.push_back(useragent_str); + } + args.push_back("--"); + args.push_back(url.c_str()); + args.push_back(nullptr); + if(debug_download) { + for(const char *arg : args) { + if(arg) + fprintf(stderr, "'%s' ", arg); + } + fprintf(stderr, "\n"); + } + ReadProgram read_program; + if(exec_program_pipe(args.data(), &read_program) != 0) + return DownloadResult::NET_ERR; + + FILE *file = fdopen(read_program.read_fd, "rb"); + char read_buffer[8192]; + rapidjson::FileReadStream is(file, read_buffer, sizeof(read_buffer)); + rapidjson::ParseResult parse_result = result.ParseStream(is); + + wait_program(read_program.pid); + fclose(file); + fprintf(stderr, "Download duration for %s: %d ms\n", url.c_str(), timer.getElapsedTime().asMilliseconds()); + + return parse_result.IsError() ? DownloadResult::ERR : DownloadResult::OK; + } }
\ No newline at end of file diff --git a/src/Program.c b/src/Program.c index 2307798..fc80e5e 100644 --- a/src/Program.c +++ b/src/Program.c @@ -11,7 +11,7 @@ #define READ_END 0 #define WRITE_END 1 -int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata) { +int exec_program_pipe(const char **args, ReadProgram *read_program) { /* 1 arguments */ if(args[0] == NULL) return -1; @@ -49,58 +49,68 @@ int exec_program(const char **args, ProgramOutputCallback output_callback, void _exit(127); } else { /* parent */ close(fd[WRITE_END]); + read_program->pid = pid; + read_program->read_fd = fd[READ_END]; + return 0; + } +} - int result = 0; - int status; - - char buffer[4097]; +int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata) { + ReadProgram read_program; + int res = exec_program_pipe(args, &read_program); + if(res != 0) + return res; - for(;;) { - ssize_t bytes_read = read(fd[READ_END], buffer, sizeof(buffer) - 1); - if(bytes_read == 0) { - break; - } else if(bytes_read == -1) { - int err = errno; - fprintf(stderr, "Failed to read from pipe to program %s, error: %s\n", args[0], strerror(err)); - result = -err; - goto cleanup; - } + int result = 0; + int status; - buffer[bytes_read] = '\0'; - if(output_callback && output_callback(buffer, bytes_read, userdata) != 0) - break; - } + char buffer[4097]; - if(waitpid(pid, &status, 0) == -1) { - perror("waitpid failed"); - result = -5; + for(;;) { + ssize_t bytes_read = read(read_program.read_fd, buffer, sizeof(buffer) - 1); + if(bytes_read == 0) { + break; + } else if(bytes_read == -1) { + int err = errno; + fprintf(stderr, "Failed to read from pipe to program %s, error: %s\n", args[0], strerror(err)); + result = -err; goto cleanup; } - if(!WIFEXITED(status)) { - result = -4; - goto cleanup; - } + buffer[bytes_read] = '\0'; + if(output_callback && output_callback(buffer, bytes_read, userdata) != 0) + break; + } - int exit_status = WEXITSTATUS(status); - if(exit_status != 0) { - fprintf(stderr, "Failed to execute program ("); - const char **arg = args; - while(*arg) { - if(arg != args) - fputc(' ', stderr); - fprintf(stderr, "'%s'", *arg); - ++arg; - } - fprintf(stderr, "), exit status %d\n", exit_status); - result = -exit_status; - goto cleanup; - } + if(waitpid(read_program.pid, &status, 0) == -1) { + perror("waitpid failed"); + result = -5; + goto cleanup; + } - cleanup: - close(fd[READ_END]); - return result; + if(!WIFEXITED(status)) { + result = -4; + goto cleanup; + } + + int exit_status = WEXITSTATUS(status); + if(exit_status != 0) { + fprintf(stderr, "Failed to execute program ("); + const char **arg = args; + while(*arg) { + if(arg != args) + fputc(' ', stderr); + fprintf(stderr, "'%s'", *arg); + ++arg; + } + fprintf(stderr, "), exit status %d\n", exit_status); + result = -exit_status; + goto cleanup; } + + cleanup: + close(read_program.read_fd); + return result; } int wait_program(pid_t process_id) { @@ -186,43 +196,3 @@ int exec_program_async(const char **args, pid_t *result_process_id) { } return 0; } - -#if 0 -int program_pipe_write(ProgramPipe *self, const char *data, size_t size) { - ssize_t bytes_written = write(self->write_fd, data, size); - if(bytes_written == -1) { - int err = errno; - perror("Failed to write to pipe to program"); - return -err; - } - return 0; -} - -int program_pipe_read(ProgramPipe *self, ProgramOutputCallback output_callback, void *userdata) { - char buffer[2048]; - - for(;;) { - ssize_t bytes_read = read(self->read_fd, buffer, sizeof(buffer) - 1); - if(bytes_read == 0) { - break; - } else if(bytes_read == -1) { - int err = errno; - perror("Failed to read from pipe to program"); - return -err; - } - - buffer[bytes_read] = '\0'; - if(output_callback && output_callback(buffer, bytes_read, userdata) != 0) - break; - } - - return 0; -} - -void program_pipe_close(ProgramPipe *self) { - close(self->read_fd); - close(self->write_fd); - self->read_fd = -1; - self->write_fd = -1; -} -#endif diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp index 3637d41..a0d8568 100644 --- a/src/plugins/Matrix.cpp +++ b/src/plugins/Matrix.cpp @@ -1730,22 +1730,12 @@ namespace QuickMedia { } DownloadResult Matrix::download_json(rapidjson::Document &result, const std::string &url, std::vector<CommandArg> additional_args, bool use_browser_useragent, std::string *err_msg) const { - std::string server_response; - if(download_to_string(url, server_response, std::move(additional_args), use_tor, use_browser_useragent, err_msg == nullptr) != DownloadResult::OK) { + if(download_to_json(url, result, std::move(additional_args), use_tor, use_browser_useragent, err_msg == nullptr) != DownloadResult::OK) { + // Cant get error since we parse directory to json. TODO: Make this work somehow? if(err_msg) - *err_msg = server_response; + *err_msg = "Failed to download/parse json"; return DownloadResult::NET_ERR; } - - rapidjson::ParseResult parse_result = result.Parse(server_response.c_str(), server_response.size()); - if(parse_result.IsError()) { - std::string error_code_str = std::to_string(parse_result.Code()); - fprintf(stderr, "download_json error: %s\n", error_code_str.c_str()); - if(err_msg) - *err_msg = "Json parse error: " + std::move(error_code_str); - return DownloadResult::ERR; - } - return DownloadResult::OK; } }
\ No newline at end of file |