aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2022-11-08 20:01:27 +0100
committerdec05eba <dec05eba@protonmail.com>2022-11-08 20:01:27 +0100
commitf91ff5856e2276825d450610d1b040fcf8bda6cd (patch)
treed9f5bb5428fe87e9c59d08ba12b854d926f846b9
parent840b87c42bb55ae6f47acc7576b3b40af4c6a68c (diff)
Add syntax highlighting (currently only for matrix codeblocks)
-rw-r--r--README.md1
-rw-r--r--TODO3
-rw-r--r--include/Program.hpp7
-rw-r--r--src/Program.cpp198
-rw-r--r--src/Text.cpp2
-rw-r--r--src/plugins/Matrix.cpp49
6 files changed, 242 insertions, 18 deletions
diff --git a/README.md b/README.md
index 3aef04e..e7a32da 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ Installing `lld` (the LLVM linker) can improve compile times.
`xdg-utils` which provides `xdg-open` needs to be installed when downloading torrents with `nyaa.si` plugin.\
`ffmpeg (and ffprobe which is included in ffmpeg)` needs to be installed to display webp thumbnails, to upload videos with thumbnails on matrix or to merge video and audio when downloading youtube videos.\
`fc-match` (which is part of `fontconfig`) needs to be installed when `use_system_fonts` config is set to `true`.\
+`source-hightlight` needs to be installed for syntax highlighting in matrix codeblocks.\
`wget` needs to be installed for xxx plugins.
## Matrix
QuickMedia does currently not support encryption in matrix. If you want encryption then you can use [pantalaimon](https://github.com/matrix-org/pantalaimon/).\
diff --git a/TODO b/TODO
index b05bd45..4f5ca5d 100644
--- a/TODO
+++ b/TODO
@@ -245,4 +245,5 @@ Downloading files should take into account the remove mime type if available. Fa
Text images atlas.
Do not render invalid unicode.
Use matrix "from" with proper cache.
-Text editing should take into consideration FORMATTED_TEXT_START/FORMATTED_TEXT_END. \ No newline at end of file
+Text editing should take into consideration FORMATTED_TEXT_START/FORMATTED_TEXT_END.
+4chan code syntax highlight. 4chan doesn't say what language it is so we have to somehow guess the language. \ No newline at end of file
diff --git a/include/Program.hpp b/include/Program.hpp
index 92defa1..e60a6ae 100644
--- a/include/Program.hpp
+++ b/include/Program.hpp
@@ -23,6 +23,13 @@ int exec_program_pipe(const char **args, ReadProgram *read_program);
and the last which is NULL, which indicates end of args.
|buffer_size| has to be between 1 and 65536.
*/
+int exec_program_write_stdin(const char **args, const char *str, size_t size, ProgramOutputCallback output_callback, void *userdata, int buffer_size = 16384);
+
+/*
+ @args need to have at least 2 arguments. The first which is the program name
+ and the last which is NULL, which indicates end of args.
+ |buffer_size| has to be between 1 and 65536.
+*/
int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata, int buffer_size = 16384);
// Return the exit status, or a negative value if waiting failed
diff --git a/src/Program.cpp b/src/Program.cpp
index c66611d..ee189f7 100644
--- a/src/Program.cpp
+++ b/src/Program.cpp
@@ -13,8 +13,14 @@
#define READ_END 0
#define WRITE_END 1
+struct ReadWriteProgram {
+ pid_t pid = -1;
+ int read_fd = -1;
+ int write_fd = -1;
+};
+
struct ThreadProgram {
- ReadProgram read_program;
+ ReadWriteProgram program;
bool killed;
};
@@ -26,8 +32,9 @@ public:
CurrentThreadProgram() {
std::lock_guard<std::mutex> lock(thread_current_program_mutex);
ThreadProgram thread_program;
- thread_program.read_program.pid = -1;
- thread_program.read_program.read_fd = -1;
+ thread_program.program.pid = -1;
+ thread_program.program.read_fd = -1;
+ thread_program.program.write_fd = -1;
thread_program.killed = false;
thread_current_program[std::this_thread::get_id()] = std::move(thread_program);
}
@@ -37,30 +44,52 @@ public:
thread_current_program.erase(std::this_thread::get_id());
}
+ // TODO: Make sure the thread specific program has been stopped before this is called. exec_program_pipe needs to be modified for that
void set(ReadProgram read_program) {
std::lock_guard<std::mutex> lock(thread_current_program_mutex);
auto it = thread_current_program.find(std::this_thread::get_id());
+ if(it != thread_current_program.end()) {
+ it->second.program.pid = read_program.pid;
+ it->second.program.read_fd = read_program.read_fd;
+ it->second.program.write_fd = -1;
+ }
+ }
+
+ // TODO: Make sure the thread specific program has been stopped before this is called. exec_program_pipe needs to be modified for that
+ void set(ReadWriteProgram program) {
+ std::lock_guard<std::mutex> lock(thread_current_program_mutex);
+ auto it = thread_current_program.find(std::this_thread::get_id());
if(it != thread_current_program.end())
- it->second.read_program = std::move(read_program);
+ it->second.program = std::move(program);
}
void clear() {
std::lock_guard<std::mutex> lock(thread_current_program_mutex);
auto it = thread_current_program.find(std::this_thread::get_id());
if(it != thread_current_program.end()) {
- it->second.read_program.pid = -1;
- it->second.read_program.read_fd = -1;
+ it->second.program.pid = -1;
+ it->second.program.read_fd = -1;
+ it->second.program.write_fd = -1;
}
}
+ // TODO: This same mutex should be used in the exec_... functions when they do kill() etc to make sure we dont accidentally kill another program here if another process gets the killed process id!
void kill_in_thread(const std::thread::id &thread_id) {
std::lock_guard<std::mutex> lock(thread_current_program_mutex);
auto it = thread_current_program.find(thread_id);
if(it != thread_current_program.end()) {
- if(it->second.read_program.read_fd != -1)
- close(it->second.read_program.read_fd);
- if(it->second.read_program.pid != -1)
- kill(it->second.read_program.pid, SIGTERM);
+ if(it->second.program.read_fd != -1) {
+ close(it->second.program.read_fd);
+ it->second.program.read_fd = -1;
+ }
+ if(it->second.program.write_fd != -1) {
+ close(it->second.program.write_fd);
+ it->second.program.write_fd = -1;
+ }
+ if(it->second.program.pid != -1) {
+ kill(it->second.program.pid, SIGTERM);
+ it->second.program.pid = -1;
+ }
it->second.killed = true;
}
}
@@ -116,6 +145,155 @@ int exec_program_pipe(const char **args, ReadProgram *read_program) {
}
}
+static int exec_program_pipe2(const char **args, ReadWriteProgram *program) {
+ program->pid = -1;
+ program->read_fd = -1;
+ program->write_fd = -1;
+
+ /* 1 arguments */
+ if(args[0] == NULL)
+ return -1;
+
+ if(current_thread_program.is_killed())
+ return -1;
+
+ int read_fd[2];
+ if(pipe(read_fd) == -1) {
+ perror("Failed to open pipe");
+ return -2;
+ }
+
+ int write_fd[2];
+ if(pipe(write_fd) == -1) {
+ close(read_fd[0]);
+ close(read_fd[1]);
+ perror("Failed to open pipe");
+ return -2;
+ }
+
+ pid_t pid = vfork();
+ if(pid == -1) {
+ perror("Failed to vfork");
+ close(read_fd[READ_END]);
+ close(read_fd[WRITE_END]);
+ close(write_fd[READ_END]);
+ close(write_fd[WRITE_END]);
+ return -3;
+ } else if(pid == 0) { /* child */
+ dup2(read_fd[WRITE_END], STDOUT_FILENO);
+ close(read_fd[READ_END]);
+ close(read_fd[WRITE_END]);
+
+ dup2(write_fd[READ_END], STDIN_FILENO);
+ close(write_fd[READ_END]);
+ close(write_fd[WRITE_END]);
+
+ execvp(args[0], (char* const*)args);
+ perror("execvp");
+ _exit(127);
+ } else { /* parent */
+ close(read_fd[WRITE_END]);
+ close(write_fd[READ_END]);
+ program->pid = pid;
+ program->read_fd = read_fd[READ_END];
+ program->write_fd = write_fd[WRITE_END];
+ current_thread_program.set(*program);
+ return 0;
+ }
+}
+
+int exec_program_write_stdin(const char **args, const char *str, size_t size, ProgramOutputCallback output_callback, void *userdata, int buffer_size) {
+ ReadWriteProgram program;
+ int res = exec_program_pipe2(args, &program);
+ if(res != 0)
+ return res;
+
+ int result = 0;
+ int status;
+ int exit_status;
+
+ assert(buffer_size >= 1 && buffer_size <= 65536);
+ char *buffer = (char*)alloca(buffer_size + 1);
+
+ const ssize_t write_buffer_size = 8192;
+ size_t write_offset = 0;
+ while(write_offset < size) {
+ ssize_t write_size = (ssize_t)size - (ssize_t)write_offset;
+ if(write_size > write_buffer_size)
+ write_size = write_buffer_size;
+
+ ssize_t bytes_written = write(program.write_fd, str + write_offset, write_size);
+ if(bytes_written == -1) {
+ int err = errno;
+ fprintf(stderr, "Failed to write to pipe to program %s, error: %s\n", args[0], strerror(err));
+ result = -err;
+ break;
+ }
+
+ if(bytes_written < write_size)
+ write_size = bytes_written;
+
+ write_offset += write_size;
+ }
+
+ close(program.write_fd);
+
+ if(result == 0) {
+ for(;;) {
+ ssize_t bytes_read = read(program.read_fd, buffer, buffer_size);
+ if(bytes_read == 0) {
+ break;
+ } else if(bytes_read == -1) {
+ int err = errno;
+ fprintf(stderr, "Failed to read from pipe to program %s, error: %s\n", args[0], strerror(err));
+ result = -err;
+ break;
+ }
+
+ buffer[bytes_read] = '\0';
+ if(output_callback) {
+ result = output_callback(buffer, bytes_read, userdata);
+ if(result != 0)
+ break;
+ }
+ }
+ }
+
+ // TODO: Set program.pid to -1 and with currenthreadprogram mutex. Same in other places
+ if(result != 0)
+ kill(program.pid, SIGTERM);
+
+ if(waitpid(program.pid, &status, 0) == -1) {
+ perror("waitpid failed");
+ result = -5;
+ goto cleanup;
+ }
+
+ if(!WIFEXITED(status)) {
+ result = -4;
+ goto cleanup;
+ }
+
+ exit_status = WEXITSTATUS(status);
+ if(exit_status != 0) {
+ fprintf(stderr, "Failed to execute program (");
+ const char **arg = args;
+ while(*arg) {
+ if(arg != args)
+ fputc(' ', stderr);
+ fprintf(stderr, "'%s'", *arg);
+ ++arg;
+ }
+ fprintf(stderr, "), exit status %d\n", exit_status);
+ result = -exit_status;
+ }
+
+ cleanup:
+ program_clear_current_thread();
+ close(program.read_fd);
+ return result;
+}
+
int exec_program(const char **args, ProgramOutputCallback output_callback, void *userdata, int buffer_size) {
ReadProgram read_program;
int res = exec_program_pipe(args, &read_program);
diff --git a/src/Text.cpp b/src/Text.cpp
index 44bf73c..836651c 100644
--- a/src/Text.cpp
+++ b/src/Text.cpp
@@ -507,7 +507,7 @@ namespace QuickMedia
text_element.text_type = TextElement::TextType::TEXT;
if(text_element.type == TextElement::Type::TEXT) {
const std::string_view inside_text = text_element.text;
- text_element.text = std::string_view("", 0);
+ text_element.text = std::string_view();
text_element.text_num_bytes = 0;
text_element.type = TextElement::Type::FORMAT_START;
text_elements.push_back(text_element);
diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp
index a9284f5..d1a060d 100644
--- a/src/plugins/Matrix.cpp
+++ b/src/plugins/Matrix.cpp
@@ -2289,10 +2289,21 @@ namespace QuickMedia {
bool inside_font_tag = false;
bool font_tag_has_custom_color = false;
bool inside_code_tag = false;
+ std::string_view code_tag_language;
bool allow_formatted_text = false;
+ bool inside_source_highlight = false;
+ bool supports_syntax_highlight = false;
mgl::Color font_color = mgl::Color(255, 255, 255, 255);
};
+ static int accumulate_string(char *data, int size, void *userdata) {
+ std::string *str = (std::string*)userdata;
+ if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable
+ return 1;
+ str->append(data, size);
+ return 0;
+ }
+
// TODO: Full proper parsing with tag depth
static int formattext_text_parser_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) {
FormattedTextParseUserdata &parse_userdata = *(FormattedTextParseUserdata*)userdata;
@@ -2304,8 +2315,10 @@ namespace QuickMedia {
parse_userdata.inside_font_tag = true;
else if(html_parser->tag_name.size == 8 && memcmp(html_parser->tag_name.data, "mx-reply", 8) == 0)
++parse_userdata.mx_reply_depth;
- else if(html_parser->tag_name.size == 4 && memcmp(html_parser->tag_name.data, "code", 4) == 0)
+ else if(html_parser->tag_name.size == 4 && memcmp(html_parser->tag_name.data, "code", 4) == 0) {
parse_userdata.inside_code_tag = true;
+ parse_userdata.code_tag_language = std::string_view();
+ }
break;
}
case HTML_PARSE_TAG_END: {
@@ -2325,6 +2338,9 @@ namespace QuickMedia {
if(parse_userdata.inside_font_tag && html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "color", 5) == 0) {
if(parse_hex_set_color(html_parser->attribute_value.data, html_parser->attribute_value.size, parse_userdata.font_color))
parse_userdata.font_tag_has_custom_color = true;
+ } else if(parse_userdata.inside_code_tag && html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "class", 5) == 0) {
+ if(html_parser->attribute_value.size > 9 && memcmp(html_parser->attribute_value.data, "language-", 9) == 0)
+ parse_userdata.code_tag_language = std::string_view(html_parser->attribute_value.data + 9, html_parser->attribute_value.size - 9);
}
break;
}
@@ -2335,12 +2351,32 @@ namespace QuickMedia {
html_unescape_sequences(text_to_add);
uint8_t formatted_text_flags = FORMATTED_TEXT_FLAG_NONE;
- if(parse_userdata.font_tag_has_custom_color)
- formatted_text_flags |= FORMATTED_TEXT_FLAG_COLOR;
- if(parse_userdata.inside_code_tag)
- formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE;
+ if(parse_userdata.allow_formatted_text) {
+ if(parse_userdata.font_tag_has_custom_color)
+ formatted_text_flags |= FORMATTED_TEXT_FLAG_COLOR;
+
+ if(parse_userdata.inside_source_highlight || !parse_userdata.supports_syntax_highlight || (parse_userdata.inside_code_tag && parse_userdata.code_tag_language.size() == 0)) {
+ formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE;
+ } else if(parse_userdata.inside_code_tag) {
+ formatted_text_flags |= FORMATTED_TEXT_FLAG_MONOSPACE;
+ // TODO: guess language from code if no language is set.
+ // TODO: Allow the user to choose style in config file.
+
+ const std::string code_language(parse_userdata.code_tag_language);
+ const char *args[] = { "source-highlight", "-f", "html", "-s", code_language.c_str(), "--style-file=esc256.style", "-o", "STDOUT", nullptr };
+ std::string output;
+ if(exec_program_write_stdin(args, text_to_add.c_str(), text_to_add.size(), accumulate_string, &output) == 0) {
+ FormattedTextParseUserdata code_parse_userdata;
+ code_parse_userdata.allow_formatted_text = true;
+ code_parse_userdata.inside_source_highlight = true;
+ html_parser_parse(output.c_str(), output.size(), formattext_text_parser_callback, &code_parse_userdata);
+ text_to_add = std::move(code_parse_userdata.result);
+ formatted_text_flags = FORMATTED_TEXT_FLAG_NONE;
+ }
+ }
+ }
- if(formatted_text_flags != FORMATTED_TEXT_FLAG_NONE && parse_userdata.allow_formatted_text)
+ if(formatted_text_flags != FORMATTED_TEXT_FLAG_NONE)
parse_userdata.result += Text::formatted_text(text_to_add, parse_userdata.font_color, formatted_text_flags);
else
parse_userdata.result += std::move(text_to_add);
@@ -2354,6 +2390,7 @@ namespace QuickMedia {
std::string formatted_text_to_qm_text(const char *str, size_t size, bool allow_formatted_text) {
FormattedTextParseUserdata parse_userdata;
parse_userdata.allow_formatted_text = allow_formatted_text;
+ parse_userdata.supports_syntax_highlight = is_program_executable_by_name("source-highlight");
html_parser_parse(str, size, formattext_text_parser_callback, &parse_userdata);
return std::move(parse_userdata.result);
}