From f91ff5856e2276825d450610d1b040fcf8bda6cd Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 8 Nov 2022 20:01:27 +0100 Subject: Add syntax highlighting (currently only for matrix codeblocks) --- README.md | 1 + TODO | 3 +- include/Program.hpp | 7 ++ src/Program.cpp | 198 ++++++++++++++++++++++++++++++++++++++++++++++--- src/Text.cpp | 2 +- src/plugins/Matrix.cpp | 49 ++++++++++-- 6 files changed, 242 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 3aef04e..e7a32da 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Installing `lld` (the LLVM linker) can improve compile times. `xdg-utils` which provides `xdg-open` needs to be installed when downloading torrents with `nyaa.si` plugin.\ `ffmpeg (and ffprobe which is included in ffmpeg)` needs to be installed to display webp thumbnails, to upload videos with thumbnails on matrix or to merge video and audio when downloading youtube videos.\ `fc-match` (which is part of `fontconfig`) needs to be installed when `use_system_fonts` config is set to `true`.\ +`source-hightlight` needs to be installed for syntax highlighting in matrix codeblocks.\ `wget` needs to be installed for xxx plugins. ## Matrix QuickMedia does currently not support encryption in matrix. If you want encryption then you can use [pantalaimon](https://github.com/matrix-org/pantalaimon/).\ diff --git a/TODO b/TODO index b05bd45..4f5ca5d 100644 --- a/TODO +++ b/TODO @@ -245,4 +245,5 @@ Downloading files should take into account the remove mime type if available. Fa Text images atlas. Do not render invalid unicode. Use matrix "from" with proper cache. -Text editing should take into consideration FORMATTED_TEXT_START/FORMATTED_TEXT_END. \ No newline at end of file +Text editing should take into consideration FORMATTED_TEXT_START/FORMATTED_TEXT_END. +4chan code syntax highlight. 4chan doesn't say what language it is so we have to somehow guess the language. \ No newline at end of file diff --git a/include/Program.hpp b/include/Program.hpp index 92defa1..e60a6ae 100644 --- a/include/Program.hpp +++ b/include/Program.hpp @@ -18,6 +18,13 @@ typedef int (*ProgramOutputCallback)(char *data, int size, void *userdata); */ int exec_program_pipe(const char **args, ReadProgram *read_program); +/* + @args need to have at least 2 arguments. The first which is the program name + and the last which is NULL, which indicates end of args. + |buffer_size| has to be between 1 and 65536. +*/ +int exec_program_write_stdin(const char **args, const char *str, size_t size, ProgramOutputCallback output_callback, void *userdata, int buffer_size = 16384); + /* @args need to have at least 2 arguments. The first which is the program name and the last which is NULL, which indicates end of args. diff --git a/src/Program.cpp b/src/Program.cpp index c66611d..ee189f7 100644 --- a/src/Program.cpp +++ b/src/Program.cpp @@ -13,8 +13,14 @@ #define READ_END 0 #define WRITE_END 1 +struct ReadWriteProgram { + pid_t pid = -1; + int read_fd = -1; + int write_fd = -1; +}; + struct ThreadProgram { - ReadProgram read_program; + ReadWriteProgram program; bool killed; }; @@ -26,8 +32,9 @@ public: CurrentThreadProgram() { std::lock_guard lock(thread_current_program_mutex); ThreadProgram thread_program; - thread_program.read_program.pid = -1; - thread_program.read_program.read_fd = -1; + thread_program.program.pid = -1; + thread_program.program.read_fd = -1; + thread_program.program.write_fd = -1; thread_program.killed = false; thread_current_program[std::this_thread::get_id()] = std::move(thread_program); } @@ -37,30 +44,52 @@ public: thread_current_program.erase(std::this_thread::get_id()); } + // TODO: Make sure the thread specific program has been stopped before this is called. exec_program_pipe needs to be modified for that void set(ReadProgram read_program) { + std::lock_guard lock(thread_current_program_mutex); + auto it = thread_current_program.find(std::this_thread::get_id()); + if(it != thread_current_program.end()) { + it->second.program.pid = read_program.pid; + it->second.program.read_fd = read_program.read_fd; + it->second.program.write_fd = -1; + } + } + + // TODO: Make sure the thread specific program has been stopped before this is called. exec_program_pipe needs to be modified for that + void set(ReadWriteProgram program) { std::lock_guard lock(thread_current_program_mutex); auto it = thread_current_program.find(std::this_thread::get_id()); if(it != thread_current_program.end()) - it->second.read_program = std::move(read_program); + it->second.program = std::move(program); } void clear() { std::lock_guard lock(thread_current_program_mutex); auto it = thread_current_program.find(std::this_thread::get_id()); if(it != thread_current_program.end()) { - it->second.read_program.pid = -1; - it->second.read_program.read_fd = -1; + it->second.program.pid = -1; + it->second.program.read_fd = -1; + it->second.program.write_fd = -1; } } + // TODO: This same mutex should be used in the exec_... functions when they do kill() etc to make sure we dont accidentally kill another program here if another process gets the killed process id! void kill_in_thread(const std::thread::id &thread_id) { std::lock_guard lock(thread_current_program_mutex); auto it = thread_current_program.find(thread_id); if(it != thread_current_program.end()) { - if(it->second.read_program.read_fd != -1) - close(it->second.read_program.read_fd); - if(it->second.read_program.pid != -1) - kill(it->second.read_program.pid, SIGTERM); + if(it->second.program.read_fd != -1) { + close(it->second.program.read_fd); + it->second.program.read_fd = -1; + } + if(it->second.program.write_fd != -1) { + close(it->second.program.write_fd); + it->second.program.write_fd = -1; + } + if(it->second.program.pid != -1) { + kill(it->second.program.pid, SIGTERM); + it->second.program.pid = -1; + } it->second.killed = true; } } @@ -116,6 +145,155 @@ int exec_program_pipe(const char **args, ReadProgram *read_program) { } } +static int exec_program_pipe2(const char **args, ReadWriteProgram *program) { + program->pid = -1; + program->read_fd = -1; + program->write_fd = -1; + + /* 1 arguments */ + if(args[0] == NULL) + return -1; + + if(current_thread_program.is_killed()) + return -1; + + int read_fd[2]; + if(pipe(read_fd) == -1) { + perror("Failed to open pipe"); + return -2; + } + + int write_fd[2]; + if(pipe(write_fd) == -1) { + close(read_fd[0]); + close(read_fd[1]); + perror("Failed to open pipe"); + return -2; + } + + pid_t pid = vfork(); + if(pid == -1) { + perror("Failed to vfork"); + close(read_fd[READ_END]); + close(read_fd[WRITE_END]); + close(write_fd[READ_END]); + close(write_fd[WRITE_END]); + return -3; + } else if(pid == 0) { /* child */ + dup2(read_fd[WRITE_END], STDOUT_FILENO); + close(read_fd[READ_END]); + close(read_fd[WRITE_END]); + + dup2(write_fd[READ_END], STDIN_FILENO); + close(write_fd[READ_END]); + close(write_fd[WRITE_END]); + + execvp(args[0], (char* const*)args); + perror("execvp"); + _exit(127); + } else { /* parent */ + close(read_fd[WRITE_END]); + close(write_fd[READ_END]); + program->pid = pid; + program->read_fd = read_fd[READ_END]; + program->write_fd = write_fd[WRITE_END]; + current_thread_program.set(*program); + return 0; + } +} + +int exec_program_write_stdin(const char **args, const char *str, size_t size, ProgramOutputCallback output_callback, void *userdata, int buffer_size) { + ReadWriteProgram program; + int res = exec_program_pipe2(args, &program); + if(res != 0) + return res; + + int result = 0; + int status; + int exit_status; + + assert(buffer_size >= 1 && buffer_size <= 65536); + char *buffer = (char*)alloca(buffer_size + 1); + + const ssize_t write_buffer_size = 8192; + size_t write_offset = 0; + while(write_offset < size) { + ssize_t write_size = (ssize_t)size - (ssize_t)write_offset; + if(write_size > write_buffer_size) + write_size = write_buffer_size; + + ssize_t bytes_written = write(program.write_fd, str + write_offset, write_size); + if(bytes_written == -1) { + int err = errno; + fprintf(stderr, "Failed to write to pipe to program %s, error: %s\n", args[0], strerror(err)); + result = -err; + break; + } + + if(bytes_written < write_size) + write_size = bytes_written; + + write_offset += write_size; + } + + close(program.write_fd); + + if(result == 0) { + for(;;) { + ssize_t bytes_read = read(program.read_fd, buffer, buffer_size); + if(bytes_read == 0) { + break; + } else if(bytes_read == -1) { + int err = errno; + fprintf(stderr, "Failed to read from pipe to program %s, error: %s\n", args[0], strerror(err)); + result = -err; + break; + } + + buffer[bytes_read] = '\0'; + if(output_callback) { + result = output_callback(buffer, bytes_read, userdata); + if(result != 0) + break; + } + } + } + + // TODO: Set program.pid to -1 and with currenthreadprogram mutex. Same in other places + if(result != 0) + kill(program.pid, SIGTERM); + + if(waitpid(program.pid, &status, 0) == -1) { + perror("waitpid failed"); + result = -5; + goto cleanup; + } + + if(!WIFEXITED(status)) { + result = -4; + goto cleanup; + } + + exit_status = WEXITSTATUS(status); + if(exit_status != 0) { + fprintf(stderr, "Failed to execute program ("); + const char **arg = args; + while(*arg) { + if(arg != args) + fputc(' ', stderr); + fprintf(stderr, "'%s'", *arg); + ++arg; + } + fprintf(stderr, "), exit status %d\n", exit_status); + result = -exit_status; + } + + cleanup: + program_clear_current_thread(); + close(program.read_fd); + return result; +} + int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata, int buffer_size) { ReadProgram read_program; int res = exec_program_pipe(args, &read_program); diff --git a/src/Text.cpp b/src/Text.cpp index 44bf73c..836651c 100644 --- a/src/Text.cpp +++ b/src/Text.cpp @@ -507,7 +507,7 @@ namespace QuickMedia text_element.text_type = TextElement::TextType::TEXT; if(text_element.type == TextElement::Type::TEXT) { const std::string_view inside_text = text_element.text; - text_element.text = std::string_view("", 0); + text_element.text = std::string_view(); text_element.text_num_bytes = 0; text_element.type = TextElement::Type::FORMAT_START; text_elements.push_back(text_element); diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp index a9284f5..d1a060d 100644 --- a/src/plugins/Matrix.cpp +++ b/src/plugins/Matrix.cpp @@ -2289,10 +2289,21 @@ namespace QuickMedia { bool inside_font_tag = false; bool font_tag_has_custom_color = false; bool inside_code_tag = false; + std::string_view code_tag_language; bool allow_formatted_text = false; + bool inside_source_highlight = false; + bool supports_syntax_highlight = false; mgl::Color font_color = mgl::Color(255, 255, 255, 255); }; + static int accumulate_string(char *data, int size, void *userdata) { + std::string *str = (std::string*)userdata; + if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable + return 1; + str->append(data, size); + return 0; + } + // TODO: Full proper parsing with tag depth static int formattext_text_parser_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { FormattedTextParseUserdata &parse_userdata = *(FormattedTextParseUserdata*)userdata; @@ -2304,8 +2315,10 @@ namespace QuickMedia { parse_userdata.inside_font_tag = true; else if(html_parser->tag_name.size == 8 && memcmp(html_parser->tag_name.data, "mx-reply", 8) == 0) ++parse_userdata.mx_reply_depth; - else if(html_parser->tag_name.size == 4 && memcmp(html_parser->tag_name.data, "code", 4) == 0) + else if(html_parser->tag_name.size == 4 && memcmp(html_parser->tag_name.data, "code", 4) == 0) { parse_userdata.inside_code_tag = true; + parse_userdata.code_tag_language = std::string_view(); + } break; } case HTML_PARSE_TAG_END: { @@ -2325,6 +2338,9 @@ namespace QuickMedia { if(parse_userdata.inside_font_tag && html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "color", 5) == 0) { if(parse_hex_set_color(html_parser->attribute_value.data, html_parser->attribute_value.size, parse_userdata.font_color)) parse_userdata.font_tag_has_custom_color = true; + } else if(parse_userdata.inside_code_tag && html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "class", 5) == 0) { + if(html_parser->attribute_value.size > 9 && memcmp(html_parser->attribute_value.data, "language-", 9) == 0) + parse_userdata.code_tag_language = std::string_view(html_parser->attribute_value.data + 9, html_parser->attribute_value.size - 9); } break; } @@ -2335,12 +2351,32 @@ namespace QuickMedia { html_unescape_sequences(text_to_add); uint8_t formatted_text_flags = FORMATTED_TEXT_FLAG_NONE; - if(parse_userdata.font_tag_has_custom_color) - formatted_text_flags |= FORMATTED_TEXT_FLAG_COLOR; - if(parse_userdata.inside_code_tag) - formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE; + if(parse_userdata.allow_formatted_text) { + if(parse_userdata.font_tag_has_custom_color) + formatted_text_flags |= FORMATTED_TEXT_FLAG_COLOR; + + if(parse_userdata.inside_source_highlight || !parse_userdata.supports_syntax_highlight || (parse_userdata.inside_code_tag && parse_userdata.code_tag_language.size() == 0)) { + formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE; + } else if(parse_userdata.inside_code_tag) { + formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE; + // TODO: guess language from code if no language is set. + // TODO: Allow the user to choose style in config file. + + const std::string code_language(parse_userdata.code_tag_language); + const char *args[] = { "source-highlight", "-f", "html", "-s", code_language.c_str(), "--style-file=esc256.style", "-o", "STDOUT", nullptr }; + std::string output; + if(exec_program_write_stdin(args, text_to_add.c_str(), text_to_add.size(), accumulate_string, &output) == 0) { + FormattedTextParseUserdata code_parse_userdata; + code_parse_userdata.allow_formatted_text = true; + code_parse_userdata.inside_source_highlight = true; + html_parser_parse(output.c_str(), output.size(), formattext_text_parser_callback, &code_parse_userdata); + text_to_add = std::move(code_parse_userdata.result); + formatted_text_flags = FORMATTED_TEXT_FLAG_NONE; + } + } + } - if(formatted_text_flags != FORMATTED_TEXT_FLAG_NONE && parse_userdata.allow_formatted_text) + if(formatted_text_flags != FORMATTED_TEXT_FLAG_NONE) parse_userdata.result += Text::formatted_text(text_to_add, parse_userdata.font_color, formatted_text_flags); else parse_userdata.result += std::move(text_to_add); @@ -2354,6 +2390,7 @@ namespace QuickMedia { std::string formatted_text_to_qm_text(const char *str, size_t size, bool allow_formatted_text) { FormattedTextParseUserdata parse_userdata; parse_userdata.allow_formatted_text = allow_formatted_text; + parse_userdata.supports_syntax_highlight = is_program_executable_by_name("source-highlight"); html_parser_parse(str, size, formattext_text_parser_callback, &parse_userdata); return std::move(parse_userdata.result); } -- cgit v1.2.3