diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | TODO | 7 | ||||
-rw-r--r-- | include/color_conversion.h | 4 | ||||
-rw-r--r-- | kms/server/kms_server.c | 82 | ||||
-rw-r--r-- | meson.build | 5 | ||||
-rw-r--r-- | project.conf | 2 | ||||
-rw-r--r-- | protocol/meson.build | 25 | ||||
-rw-r--r-- | protocol/xdg-output-unstable-v1.xml | 222 | ||||
-rw-r--r-- | src/capture/kms.c | 6 | ||||
-rw-r--r-- | src/capture/nvfbc.c | 2 | ||||
-rw-r--r-- | src/capture/portal.c | 4 | ||||
-rw-r--r-- | src/capture/xcomposite.c | 4 | ||||
-rw-r--r-- | src/capture/ximage.c | 4 | ||||
-rw-r--r-- | src/color_conversion.c | 152 | ||||
-rw-r--r-- | src/main.cpp | 64 | ||||
-rw-r--r-- | src/window/wayland.c | 92 |
17 files changed, 514 insertions, 168 deletions
@@ -4,8 +4,8 @@ compile_commands.json tests/sibs-build/ tests/compile_commands.json -external/wlr-export-dmabuf-unstable-v1-client-protocol.h -external/wlr-export-dmabuf-unstable-v1-protocol.c +**/xdg-output-unstable-v1-client-protocol.h +**/xdg-output-unstable-v1-protocol.c .clangd/ .cache/ @@ -42,7 +42,7 @@ When recording GTA V at 4k on highest settings, fps drops from 60 to 23 when usi GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg).\ GPU Screen Recorder has much better performance than OBS Studio even with version 30.2 that does "zero-copy" recording and encoding, see: [https://www.youtube.com/watch?v=jdroRjibsDw](https://www.youtube.com/watch?v=jdroRjibsDw).\ It is recommended to save the video to a SSD because of the large file size, which a slow HDD might not be fast enough to handle. Using variable framerate mode (-fm vfr) which is the default is also recommended as this reduces encoding load. Ultra quality is also overkill most of the time, very high (the default) or lower quality is usually enough.\ -Note that recording on AMD can have some performance issues on Wayland in the recording itself when recording without desktop portal unless your mesa version is 25.0.0 or greater. +Note that for best performance you should close other screen recorders such as OBS Studio when using GPU Screen Recorder even if they are not recording, since they can affect performance even when idle. This is the case with OBS Studio. ## Note about optimal performance on NVIDIA NVIDIA driver has a "feature" (read: bug) where it will downclock memory transfer rate when a program uses cuda (or nvenc, which uses cuda), such as GPU Screen Recorder. To work around this bug, GPU Screen Recorder can overclock your GPU memory transfer rate to it's normal optimal level.\ To enable overclocking for optimal performance use the `-oc` option when running GPU Screen Recorder. You also need to have "Coolbits" NVIDIA X setting set to "12" to enable overclocking. You can automatically add this option if you run `sudo nvidia-xconfig --cool-bits=12` and then reboot your computer.\ @@ -87,6 +87,7 @@ These are the dependencies needed to build GPU Screen Recorder: * libcap * wayland-client * wayland-egl +* wayland-scanner ## Runtime dependencies There are also additional dependencies needed at runtime depending on your GPU vendor: @@ -4,7 +4,6 @@ See https://trac.ffmpeg.org/wiki/EncodingForStreamingSites for optimizing stream Look at VK_EXT_external_memory_dma_buf. Use mov+faststart. Allow recording all monitors/selected monitor without nvfbc by recording the compositor proxy window and only recording the part that matches the monitor(s). -Allow recording a region by recording the compositor proxy window / nvfbc window and copying part of it. Support amf and qsv. Disable flipping on nvidia? this might fix some stuttering issues on some setups. See NvCtrlGetAttribute/NvCtrlSetAttributeAndGetStatus NV_CTRL_SYNC_TO_VBLANK https://github.com/NVIDIA/nvidia-settings/blob/d5f022976368cbceb2f20b838ddb0bf992f0cfb9/src/gtk%2B-2.x/ctkopengl.c. Replays seem to have some issues with audio/video. Why? @@ -256,3 +255,9 @@ Do proper exit, to call gsr_capture_destroy which will properly stop gsr-kms-ser Replace all scissors with clearing textures if the cursor hits the outside of the frame image. Cursor position might be slightly wrong on rotated monitor. + +External texture doesn't work on nvidia x11, probably because of glx context (requires gles es). External texture is not used on nvidia x11 right now so it's not an issue. + +Add option to save replay buffer on disk instead of ram. + +nvfbc capture cursor with cursor.h instead and composite that on top. This allows us to also always get a cursor in direct capture mode. This could possible give better performance as well. diff --git a/include/color_conversion.h b/include/color_conversion.h index 1c067e2..4c3b615 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -6,7 +6,7 @@ #include "vec2.h" #include <stdbool.h> -#define GSR_COLOR_CONVERSION_MAX_SHADERS 6 +#define GSR_COLOR_CONVERSION_MAX_SHADERS 12 #define GSR_COLOR_CONVERSION_MAX_FRAMEBUFFERS 2 typedef enum { @@ -72,7 +72,7 @@ typedef struct { int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params); void gsr_color_conversion_deinit(gsr_color_conversion *self); -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color); +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, gsr_source_color source_color, bool external_texture, bool alpha_blending); void gsr_color_conversion_clear(gsr_color_conversion *self); gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation); diff --git a/kms/server/kms_server.c b/kms/server/kms_server.c index 6d46f8a..c585f24 100644 --- a/kms/server/kms_server.c +++ b/kms/server/kms_server.c @@ -207,7 +207,7 @@ static uint32_t plane_get_properties(int drmfd, uint32_t plane_id, int *x, int * return property_mask; } -/* Returns 0 if not found */ +/* Returns NULL if not found */ static const connector_crtc_pair* get_connector_pair_by_crtc_id(const connector_to_crtc_map *c2crtc_map, uint32_t crtc_id) { for(int i = 0; i < c2crtc_map->num_maps; ++i) { if(c2crtc_map->maps[i].crtc_id == crtc_id) @@ -433,81 +433,6 @@ static double clock_get_monotonic_seconds(void) { return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; } -// static bool readlink_realpath(const char *filepath, char *buffer) { -// char symlinked_path[PATH_MAX]; -// ssize_t bytes_written = readlink(filepath, symlinked_path, sizeof(symlinked_path) - 1); -// if(bytes_written == -1 && errno == EINVAL) { -// /* Not a symlink */ -// snprintf(symlinked_path, sizeof(symlinked_path), "%s", filepath); -// } else if(bytes_written == -1) { -// return false; -// } else { -// symlinked_path[bytes_written] = '\0'; -// } - -// if(!realpath(symlinked_path, buffer)) -// return false; - -// return true; -// } - -// static void file_get_directory(char *filepath) { -// char *end = strrchr(filepath, '/'); -// if(end == NULL) -// filepath[0] = '\0'; -// else -// *end = '\0'; -// } - -// static bool string_ends_with(const char *str, const char *ends_with) { -// const int len = strlen(str); -// const int ends_with_len = strlen(ends_with); -// return len >= ends_with_len && memcmp(str + len - ends_with_len, ends_with, ends_with_len) == 0; -// } - -// This is not foolproof, but the assumption is that gsr-kms-server and gpu-screen-recorder are installed in the same directory -// in a location that only the root user can write to (usually /usr/bin or /usr/local/bin) and if the client runs from that location -// and is called gpu-screen-recorder then gsr-kms-server can only be used by a malicious program if the malicious program -// had root access, to modify that program install directory. -// static bool is_remote_peer_program_gpu_screen_recorder(int socket_fd) { -// // TODO: Use SO_PEERPIDFD on kernel >= 6.5 to avoid a race condition in the /proc/<pid> check -// struct ucred cred; -// socklen_t ucred_len = sizeof(cred); -// if(getsockopt(socket_fd, SOL_SOCKET, SO_PEERCRED, &cred, &ucred_len) == -1) { -// fprintf(stderr, "kms server error: failed to get peer credentials, error: %s\n", strerror(errno)); -// return false; -// } - -// char self_directory[PATH_MAX]; -// if(!readlink_realpath("/proc/self/exe", self_directory)) { -// fprintf(stderr, "kms server error: failed to resolve /proc/self/exe\n"); -// return false; -// } -// file_get_directory(self_directory); - -// char peer_directory[PATH_MAX]; -// char peer_exe_path[PATH_MAX]; -// snprintf(peer_exe_path, sizeof(peer_exe_path), "/proc/%d/exe", (int)cred.pid); -// if(!readlink_realpath(peer_exe_path, peer_directory)) { -// fprintf(stderr, "kms server error: failed to resolve /proc/self/exe\n"); -// return false; -// } - -// if(!string_ends_with(peer_directory, "/gpu-screen-recorder")) { -// fprintf(stderr, "kms server error: only gpu-screen-recorder can use gsr-kms-server. client program location is %s\n", peer_directory); -// return false; -// } - -// file_get_directory(peer_directory); - -// if(strcmp(self_directory, peer_directory) != 0) { -// fprintf(stderr, "kms server error: the client program is in directory %s but only programs in %s can run gsr-kms-server\n", peer_directory, self_directory); -// return false; -// } - -// return true; -// } - int main(int argc, char **argv) { int res = 0; int socket_fd = 0; @@ -579,11 +504,6 @@ int main(int argc, char **argv) { goto done; } - // if(!is_remote_peer_program_gpu_screen_recorder(socket_fd)) { - // res = 3; - // goto done; - // } - for(;;) { gsr_kms_request request; request.version = 0; diff --git a/meson.build b/meson.build index 76053c3..0a3534a 100644 --- a/meson.build +++ b/meson.build @@ -1,4 +1,4 @@ -project('gpu-screen-recorder', ['c', 'cpp'], version : '5.3.5', default_options : ['warning_level=2']) +project('gpu-screen-recorder', ['c', 'cpp'], version : '5.3.7', default_options : ['warning_level=2']) add_project_arguments('-Wshadow', language : ['c', 'cpp']) if get_option('buildtype') == 'debug' @@ -41,6 +41,9 @@ src = [ 'src/main.cpp', ] +subdir('protocol') +src += protocol_src + dep = [ dependency('threads'), dependency('libavcodec'), diff --git a/project.conf b/project.conf index eb1b845..368219b 100644 --- a/project.conf +++ b/project.conf @@ -1,7 +1,7 @@ [package] name = "gpu-screen-recorder" type = "executable" -version = "5.3.5" +version = "5.3.7" platforms = ["posix"] [config] diff --git a/protocol/meson.build b/protocol/meson.build new file mode 100644 index 0000000..bbdccba --- /dev/null +++ b/protocol/meson.build @@ -0,0 +1,25 @@ +wayland_scanner = dependency('wayland-scanner', native: true) +wayland_scanner_path = wayland_scanner.get_variable(pkgconfig: 'wayland_scanner') +wayland_scanner_prog = find_program(wayland_scanner_path, native: true) + +wayland_scanner_code = generator( + wayland_scanner_prog, + output: '@BASENAME@-protocol.c', + arguments: ['private-code', '@INPUT@', '@OUTPUT@'], +) + +wayland_scanner_client = generator( + wayland_scanner_prog, + output: '@BASENAME@-client-protocol.h', + arguments: ['client-header', '@INPUT@', '@OUTPUT@'], +) + +protocols = [ + 'xdg-output-unstable-v1.xml', +] + +protocol_src = [] +foreach xml : protocols + protocol_src += wayland_scanner_code.process(xml) + protocol_src += wayland_scanner_client.process(xml) +endforeach diff --git a/protocol/xdg-output-unstable-v1.xml b/protocol/xdg-output-unstable-v1.xml new file mode 100644 index 0000000..a7306e4 --- /dev/null +++ b/protocol/xdg-output-unstable-v1.xml @@ -0,0 +1,222 @@ +<?xml version="1.0" encoding="UTF-8"?> +<protocol name="xdg_output_unstable_v1"> + + <copyright> + Copyright © 2017 Red Hat Inc. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice (including the next + paragraph) shall be included in all copies or substantial portions of the + Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + </copyright> + + <description summary="Protocol to describe output regions"> + This protocol aims at describing outputs in a way which is more in line + with the concept of an output on desktop oriented systems. + + Some information are more specific to the concept of an output for + a desktop oriented system and may not make sense in other applications, + such as IVI systems for example. + + Typically, the global compositor space on a desktop system is made of + a contiguous or overlapping set of rectangular regions. + + The logical_position and logical_size events defined in this protocol + might provide information identical to their counterparts already + available from wl_output, in which case the information provided by this + protocol should be preferred to their equivalent in wl_output. The goal is + to move the desktop specific concepts (such as output location within the + global compositor space, etc.) out of the core wl_output protocol. + + Warning! The protocol described in this file is experimental and + backward incompatible changes may be made. Backward compatible + changes may be added together with the corresponding interface + version bump. + Backward incompatible changes are done by bumping the version + number in the protocol and interface names and resetting the + interface version. Once the protocol is to be declared stable, + the 'z' prefix and the version number in the protocol and + interface names are removed and the interface version number is + reset. + </description> + + <interface name="zxdg_output_manager_v1" version="3"> + <description summary="manage xdg_output objects"> + A global factory interface for xdg_output objects. + </description> + + <request name="destroy" type="destructor"> + <description summary="destroy the xdg_output_manager object"> + Using this request a client can tell the server that it is not + going to use the xdg_output_manager object anymore. + + Any objects already created through this instance are not affected. + </description> + </request> + + <request name="get_xdg_output"> + <description summary="create an xdg output from a wl_output"> + This creates a new xdg_output object for the given wl_output. + </description> + <arg name="id" type="new_id" interface="zxdg_output_v1"/> + <arg name="output" type="object" interface="wl_output"/> + </request> + </interface> + + <interface name="zxdg_output_v1" version="3"> + <description summary="compositor logical output region"> + An xdg_output describes part of the compositor geometry. + + This typically corresponds to a monitor that displays part of the + compositor space. + + For objects version 3 onwards, after all xdg_output properties have been + sent (when the object is created and when properties are updated), a + wl_output.done event is sent. This allows changes to the output + properties to be seen as atomic, even if they happen via multiple events. + </description> + + <request name="destroy" type="destructor"> + <description summary="destroy the xdg_output object"> + Using this request a client can tell the server that it is not + going to use the xdg_output object anymore. + </description> + </request> + + <event name="logical_position"> + <description summary="position of the output within the global compositor space"> + The position event describes the location of the wl_output within + the global compositor space. + + The logical_position event is sent after creating an xdg_output + (see xdg_output_manager.get_xdg_output) and whenever the location + of the output changes within the global compositor space. + </description> + <arg name="x" type="int" + summary="x position within the global compositor space"/> + <arg name="y" type="int" + summary="y position within the global compositor space"/> + </event> + + <event name="logical_size"> + <description summary="size of the output in the global compositor space"> + The logical_size event describes the size of the output in the + global compositor space. + + Most regular Wayland clients should not pay attention to the + logical size and would rather rely on xdg_shell interfaces. + + Some clients such as Xwayland, however, need this to configure + their surfaces in the global compositor space as the compositor + may apply a different scale from what is advertised by the output + scaling property (to achieve fractional scaling, for example). + + For example, for a wl_output mode 3840×2160 and a scale factor 2: + + - A compositor not scaling the monitor viewport in its compositing space + will advertise a logical size of 3840×2160, + + - A compositor scaling the monitor viewport with scale factor 2 will + advertise a logical size of 1920×1080, + + - A compositor scaling the monitor viewport using a fractional scale of + 1.5 will advertise a logical size of 2560×1440. + + For example, for a wl_output mode 1920×1080 and a 90 degree rotation, + the compositor will advertise a logical size of 1080x1920. + + The logical_size event is sent after creating an xdg_output + (see xdg_output_manager.get_xdg_output) and whenever the logical + size of the output changes, either as a result of a change in the + applied scale or because of a change in the corresponding output + mode(see wl_output.mode) or transform (see wl_output.transform). + </description> + <arg name="width" type="int" + summary="width in global compositor space"/> + <arg name="height" type="int" + summary="height in global compositor space"/> + </event> + + <event name="done" deprecated-since="3"> + <description summary="all information about the output have been sent"> + This event is sent after all other properties of an xdg_output + have been sent. + + This allows changes to the xdg_output properties to be seen as + atomic, even if they happen via multiple events. + + For objects version 3 onwards, this event is deprecated. Compositors + are not required to send it anymore and must send wl_output.done + instead. + </description> + </event> + + <!-- Version 2 additions --> + + <event name="name" since="2"> + <description summary="name of this output"> + Many compositors will assign names to their outputs, show them to the + user, allow them to be configured by name, etc. The client may wish to + know this name as well to offer the user similar behaviors. + + The naming convention is compositor defined, but limited to + alphanumeric characters and dashes (-). Each name is unique among all + wl_output globals, but if a wl_output global is destroyed the same name + may be reused later. The names will also remain consistent across + sessions with the same hardware and software configuration. + + Examples of names include 'HDMI-A-1', 'WL-1', 'X11-1', etc. However, do + not assume that the name is a reflection of an underlying DRM + connector, X11 connection, etc. + + The name event is sent after creating an xdg_output (see + xdg_output_manager.get_xdg_output). This event is only sent once per + xdg_output, and the name does not change over the lifetime of the + wl_output global. + + This event is deprecated, instead clients should use wl_output.name. + Compositors must still support this event. + </description> + <arg name="name" type="string" summary="output name"/> + </event> + + <event name="description" since="2"> + <description summary="human-readable description of this output"> + Many compositors can produce human-readable descriptions of their + outputs. The client may wish to know this description as well, to + communicate the user for various purposes. + + The description is a UTF-8 string with no convention defined for its + contents. Examples might include 'Foocorp 11" Display' or 'Virtual X11 + output via :1'. + + The description event is sent after creating an xdg_output (see + xdg_output_manager.get_xdg_output) and whenever the description + changes. The description is optional, and may not be sent at all. + + For objects of version 2 and lower, this event is only sent once per + xdg_output, and the description does not change over the lifetime of + the wl_output global. + + This event is deprecated, instead clients should use + wl_output.description. Compositors must still support this event. + </description> + <arg name="description" type="string" summary="output description"/> + </event> + + </interface> +</protocol> diff --git a/src/capture/kms.c b/src/capture/kms.c index 8bb09a0..18858f2 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -503,7 +503,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->cursor_texture_id, cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y}, (vec2i){0, 0}, cursor_size, cursor_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, cursor_texture_id_is_external, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -531,7 +531,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id, cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y}, (vec2i){0, 0}, self->x11_cursor.size, self->x11_cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -647,7 +647,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, target_pos, output_size, capture_pos, self->capture_size, original_frame_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, self->external_texture_fallback, false); if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 4ed19b3..b92bd41 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -397,7 +397,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex], target_pos, (vec2i){output_size.x, output_size.y}, self->params.region_position, frame_size, original_frame_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR); + GSR_ROT_0, GSR_SOURCE_COLOR_BGR, false, false); //self->params.egl->glFlush(); //self->params.egl->glFinish(); diff --git a/src/capture/portal.c b/src/capture/portal.c index ec87ab6..27f514f 100644 --- a/src/capture/portal.c +++ b/src/capture/portal.c @@ -348,7 +348,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, target_pos, output_size, (vec2i){region.x, region.y}, self->capture_size, self->capture_size, - GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, using_external_image, false); if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) { const vec2d scale = { @@ -366,7 +366,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id, (vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y}, (vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height}, (vec2i){cursor_region.width, cursor_region.height}, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index 2d0574c..db41f63 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -259,7 +259,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), target_pos, output_size, (vec2i){0, 0}, self->texture_size, self->texture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -280,7 +280,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); } //self->params.egl->glFlush(); diff --git a/src/capture/ximage.c b/src/capture/ximage.c index 1f86d93..9b02907 100644 --- a/src/capture/ximage.c +++ b/src/capture/ximage.c @@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_id, target_pos, output_size, (vec2i){0, 0}, self->capture_size, self->capture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/color_conversion.c b/src/color_conversion.c index 27ef488..1f95fd3 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -5,15 +5,18 @@ #include <string.h> #include <assert.h> -#define SHADER_INDEX_Y 0 -#define SHADER_INDEX_UV 1 -#define SHADER_INDEX_Y_EXTERNAL 2 -#define SHADER_INDEX_UV_EXTERNAL 3 -#define SHADER_INDEX_RGB 4 -#define SHADER_INDEX_RGB_EXTERNAL 5 - -// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result -// with a call to mix to choose source/output color. +#define SHADER_INDEX_Y 0 +#define SHADER_INDEX_UV 1 +#define SHADER_INDEX_Y_EXTERNAL 2 +#define SHADER_INDEX_UV_EXTERNAL 3 +#define SHADER_INDEX_RGB 4 +#define SHADER_INDEX_RGB_EXTERNAL 5 +#define SHADER_INDEX_Y_BLEND 6 +#define SHADER_INDEX_UV_BLEND 7 +#define SHADER_INDEX_Y_EXTERNAL_BLEND 8 +#define SHADER_INDEX_UV_EXTERNAL_BLEND 9 +#define SHADER_INDEX_RGB_BLEND 10 +#define SHADER_INDEX_RGB_EXTERNAL_BLEND 11 /* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */ @@ -75,9 +78,7 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color return NULL; } -// TODO: Make alpha blending optional -// TODO: Optimize these shaders. -static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); char compute_shader[2048]; @@ -98,16 +99,17 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord), 0);\n" " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix, + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -119,7 +121,7 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni return 0; } -static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); char compute_shader[2048]; @@ -140,16 +142,17 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 2;\n" // size/4 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n" - " vec2 output_texel_coord = vec2(texel_coord - source_position/2 + target_position/2) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord * 2.0);\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" + " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord) << 1, 0);\n" " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n" - " imageStore(img_output, texel_coord + target_position/2, vec4(uv_color, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + " imageStore(img_output, texel_coord + target_position, vec4(uv_color, 1.0, 1.0));\n" + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix, + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -161,7 +164,7 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un return 0; } -static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, bool external_texture) { +static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, bool external_texture, bool alpha_blending) { char compute_shader[2048]; snprintf(compute_shader, sizeof(compute_shader), "#version 310 es\n" @@ -179,15 +182,16 @@ static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_u "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" - " vec4 output_color = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord), 0);\n" + " vec4 output_color = %s;\n" " vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(color, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D"); + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -267,23 +271,43 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y], self->params.egl, &self->uniforms[SHADER_INDEX_Y], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y], self->params.egl, &self->uniforms[SHADER_INDEX_Y], self->max_local_size_dim, params->destination_color, params->color_range, false, false) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV], self->params.egl, &self->uniforms[SHADER_INDEX_UV], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV], self->params.egl, &self->uniforms[SHADER_INDEX_UV], self->max_local_size_dim, params->destination_color, params->color_range, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + goto err; + } + + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_Y_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, false, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_UV_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, false, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); goto err; } if(self->params.load_external_image_shader) { - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true, false) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + goto err; + } + + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, true, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, true, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); goto err; } @@ -296,13 +320,23 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB], self->params.egl, &self->uniforms[SHADER_INDEX_RGB], self->max_local_size_dim, false) != 0) { + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB], self->params.egl, &self->uniforms[SHADER_INDEX_RGB], self->max_local_size_dim, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_BLEND], self->max_local_size_dim, false, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } if(self->params.load_external_image_shader) { - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true) != 0) { + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL_BLEND], self->max_local_size_dim, true, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } @@ -411,7 +445,7 @@ typedef enum { GSR_COLOR_COMP_RGB } gsr_color_component; -static int color_component_get_texture_index(gsr_color_component color_component) { +static int color_component_get_destination_texture_index(gsr_color_component color_component) { switch(color_component) { case GSR_COLOR_COMP_Y: return 0; case GSR_COLOR_COMP_UV: return 1; @@ -431,23 +465,38 @@ static unsigned int color_component_get_color_format(gsr_color_component color_c return GL_RGBA8; } -static int color_component_get_shader_index(gsr_color_component color_component, bool external_texture) { +static int color_component_get_shader_index(gsr_color_component color_component, bool external_texture, bool alpha_blending) { switch(color_component) { - case GSR_COLOR_COMP_Y: return external_texture ? SHADER_INDEX_Y_EXTERNAL : SHADER_INDEX_Y; - case GSR_COLOR_COMP_UV: return external_texture ? SHADER_INDEX_UV_EXTERNAL : SHADER_INDEX_UV; - case GSR_COLOR_COMP_RGB: return external_texture ? SHADER_INDEX_RGB_EXTERNAL : SHADER_INDEX_RGB; + case GSR_COLOR_COMP_Y: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_Y_EXTERNAL_BLEND : SHADER_INDEX_Y_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_Y_BLEND : SHADER_INDEX_Y; + } + case GSR_COLOR_COMP_UV: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_UV_EXTERNAL_BLEND : SHADER_INDEX_UV_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_UV_BLEND : SHADER_INDEX_UV; + } + case GSR_COLOR_COMP_RGB: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_RGB_EXTERNAL_BLEND : SHADER_INDEX_RGB_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_RGB_BLEND : SHADER_INDEX_RGB; + } } assert(false); return SHADER_INDEX_RGB; } -static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { - const int shader_index = color_component_get_shader_index(color_component, external_texture); - const int texture_index = color_component_get_texture_index(color_component); +static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, bool alpha_blending, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { + const int shader_index = color_component_get_shader_index(color_component, external_texture, alpha_blending); + const int destination_texture_index = color_component_get_destination_texture_index(color_component); const unsigned int color_format = color_component_get_color_format(color_component, use_16bit_colors); self->params.egl->glActiveTexture(GL_TEXTURE1); - self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[texture_index]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[destination_texture_index]); self->params.egl->glActiveTexture(GL_TEXTURE0); gsr_color_uniforms *uniform = &self->uniforms[shader_index]; @@ -456,13 +505,13 @@ static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *s self->params.egl->glUniform2i(uniform->source_position, source_position.x, source_position.y); self->params.egl->glUniform2i(uniform->target_position, destination_pos.x, destination_pos.y); self->params.egl->glUniform2f(uniform->scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[destination_texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); const double num_groups_x = ceil((double)destination_size.x/(double)self->max_local_size_dim); const double num_groups_y = ceil((double)destination_size.y/(double)self->max_local_size_dim); self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); } -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) { +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, gsr_source_color source_color, bool external_texture, bool alpha_blending) { vec2f scale = {0.0f, 0.0f}; if(source_size.x > 0 && source_size.y > 0) scale = (vec2f){ (double)destination_size.x/(double)source_size.x, (double)destination_size.y/(double)source_size.y }; @@ -482,12 +531,13 @@ void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_ case GSR_DESTINATION_COLOR_NV12: case GSR_DESTINATION_COLOR_P010: { const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, (vec2i){source_position.x/2, source_position.y/2}, + (vec2i){destination_pos.x/2, destination_pos.y/2}, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); break; } case GSR_DESTINATION_COLOR_RGB8: { - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); break; } } diff --git a/src/main.cpp b/src/main.cpp index b82aeee..6b3683a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -181,6 +181,11 @@ enum class BitrateMode { CBR }; +enum class Tune { + PERFORMANCE, + QUALITY +}; + static int x11_error_handler(Display*, XErrorEvent*) { return 0; } @@ -1027,7 +1032,7 @@ static void video_hardware_set_qp(AVCodecContext *codec_context, VideoQuality vi } } -static void open_video_hardware(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr, gsr_color_depth color_depth, BitrateMode bitrate_mode, VideoCodec video_codec, bool low_power) { +static void open_video_hardware(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr, gsr_color_depth color_depth, BitrateMode bitrate_mode, VideoCodec video_codec, bool low_power, Tune tune) { (void)very_old_gpu; AVDictionary *options = nullptr; @@ -1051,6 +1056,17 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // } av_dict_set(&options, "tune", "hq", 0); + switch(tune) { + case Tune::PERFORMANCE: + //av_dict_set(&options, "multipass", "qres", 0); + break; + case Tune::QUALITY: + av_dict_set(&options, "multipass", "fullres", 0); + av_dict_set(&options, "preset", "p6", 0); + av_dict_set_int(&options, "rc-lookahead", 0, 0); + break; + } + dict_set_profile(codec_context, vendor, color_depth, &options); if(codec_context->codec_id == AV_CODEC_ID_H264) { @@ -1113,7 +1129,7 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide static void usage_header() { const bool inside_flatpak = getenv("FLATPAK_ID") != NULL; const char *program_name = inside_flatpak ? "flatpak run --command=gpu-screen-recorder com.dec05eba.gpu_screen_recorder" : "gpu-screen-recorder"; - printf("usage: %s -w <window_id|monitor|focused|portal|region> [-c <container_format>] [-s WxH] [-region WxH+X+Y] [-f <fps>] [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-restart-replay-on-save yes|no] [-k h264|hevc|av1|vp8|vp9|hevc_hdr|av1_hdr|hevc_10bit|av1_10bit] [-ac aac|opus|flac] [-ab <bitrate>] [-oc yes|no] [-fm cfr|vfr|content] [-bm auto|qp|vbr|cbr] [-cr limited|full] [-df yes|no] [-sc <script_path>] [-cursor yes|no] [-keyint <value>] [-restore-portal-session yes|no] [-portal-session-token-filepath filepath] [-encoder gpu|cpu] [-o <output_file>] [--list-capture-options [card_path] [vendor]] [--list-audio-devices] [--list-application-audio] [-v yes|no] [-gl-debug yes|no] [--version] [-h|--help]\n", program_name); + printf("usage: %s -w <window_id|monitor|focused|portal|region> [-c <container_format>] [-s WxH] [-region WxH+X+Y] [-f <fps>] [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-restart-replay-on-save yes|no] [-k h264|hevc|av1|vp8|vp9|hevc_hdr|av1_hdr|hevc_10bit|av1_10bit] [-ac aac|opus|flac] [-ab <bitrate>] [-oc yes|no] [-fm cfr|vfr|content] [-bm auto|qp|vbr|cbr] [-cr limited|full] [-tune performance|quality] [-df yes|no] [-sc <script_path>] [-cursor yes|no] [-keyint <value>] [-restore-portal-session yes|no] [-portal-session-token-filepath filepath] [-encoder gpu|cpu] [-o <output_file>] [--list-capture-options [card_path] [vendor]] [--list-audio-devices] [--list-application-audio] [-v yes|no] [-gl-debug yes|no] [--version] [-h|--help]\n", program_name); fflush(stdout); } @@ -1218,6 +1234,10 @@ static void usage_full() { printf(" Note that some buggy video players (such as vlc) are unable to correctly display videos in full color range and when upload the video to websites the website\n"); printf(" might re-encoder the video to make the video limited color range.\n"); printf("\n"); + printf(" -tune\n"); + printf(" Tune for performance or quality. Should be either 'performance' or 'quality'. At the moment this option only has an effect on Nvidia where setting this to quality\n"); + printf(" sets options such as preset, multipass and b frames. Optional, set to 'performance' by default.\n"); + printf("\n"); printf(" -df Organise replays in folders based on the current date.\n"); printf("\n"); printf(" -sc Run a script on the saved video file (asynchronously). The first argument to the script is the filepath to the saved video file and the second argument is the recording type (either \"regular\" or \"replay\").\n"); @@ -1296,7 +1316,7 @@ static void usage_full() { printf("NOTES:\n"); printf(" Send signal SIGINT to gpu-screen-recorder (Ctrl+C, or killall -SIGINT gpu-screen-recorder) to stop and save the recording. When in replay mode this stops recording without saving.\n"); printf(" Send signal SIGUSR1 to gpu-screen-recorder (killall -SIGUSR1 gpu-screen-recorder) to save a replay (when in replay mode).\n"); - printf(" Send signal SIGUSR2 to gpu-screen-recorder (killall -SIGUSR2 gpu-screen-recorder) to pause/unpause recording. Only applicable and useful when recording (not streaming nor replay).\n"); + printf(" Send signal SIGUSR2 to gpu-screen-recorder (killall -SIGUSR2 gpu-screen-recorder) to pause/unpause recording. Only applicable when recording (not streaming nor replay).\n"); printf("\n"); printf("EXAMPLES:\n"); printf(" %s -w screen -f 60 -a default_output -o video.mp4\n", program_name); @@ -3374,6 +3394,7 @@ int main(int argc, char **argv) { { "-df", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, { "-sc", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-cr", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, + { "-tune", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-cursor", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, { "-keyint", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-restore-portal-session", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, @@ -3449,7 +3470,7 @@ int main(int argc, char **argv) { //} else if(strcmp(video_codec_to_use, "hevc_vulkan") == 0) { // video_codec = VideoCodec::HEVC_VULKAN; } else if(strcmp(video_codec_to_use, "auto") != 0) { - fprintf(stderr, "Error: -k should either be either 'auto', 'h264', 'hevc', 'av1', 'vp8', 'vp9', 'hevc_hdr', 'av1_hdr', 'hevc_10bit' or 'av1_10bit', got: '%s'\n", video_codec_to_use); + fprintf(stderr, "Error: -k should either be 'auto', 'h264', 'hevc', 'av1', 'vp8', 'vp9', 'hevc_hdr', 'av1_hdr', 'hevc_10bit' or 'av1_10bit', got: '%s'\n", video_codec_to_use); usage(); } @@ -3465,7 +3486,7 @@ int main(int argc, char **argv) { } else if(strcmp(audio_codec_to_use, "flac") == 0) { audio_codec = AudioCodec::FLAC; } else { - fprintf(stderr, "Error: -ac should either be either 'aac', 'opus' or 'flac', got: '%s'\n", audio_codec_to_use); + fprintf(stderr, "Error: -ac should either be 'aac', 'opus' or 'flac', got: '%s'\n", audio_codec_to_use); usage(); } @@ -3564,7 +3585,7 @@ int main(int argc, char **argv) { } else if(strcmp(pixfmt, "yuv444") == 0) { pixel_format = PixelFormat::YUV444; } else { - fprintf(stderr, "Error: -pixfmt should either be either 'yuv420', or 'yuv444', got: '%s'\n", pixfmt); + fprintf(stderr, "Error: -pixfmt should either be 'yuv420', or 'yuv444', got: '%s'\n", pixfmt); usage(); } @@ -3736,7 +3757,7 @@ int main(int argc, char **argv) { } else if(strcmp(framerate_mode_str, "content") == 0) { framerate_mode = FramerateMode::CONTENT; } else { - fprintf(stderr, "Error: -fm should either be either 'cfr', 'vfr' or 'content', got: '%s'\n", framerate_mode_str); + fprintf(stderr, "Error: -fm should either be 'cfr', 'vfr' or 'content', got: '%s'\n", framerate_mode_str); usage(); } @@ -3757,7 +3778,7 @@ int main(int argc, char **argv) { } else if(strcmp(bitrate_mode_str, "cbr") == 0) { bitrate_mode = BitrateMode::CBR; } else if(strcmp(bitrate_mode_str, "auto") != 0) { - fprintf(stderr, "Error: -bm should either be either 'auto', 'qp', 'vbr' or 'cbr', got: '%s'\n", bitrate_mode_str); + fprintf(stderr, "Error: -bm should either be 'auto', 'qp', 'vbr' or 'cbr', got: '%s'\n", bitrate_mode_str); usage(); } @@ -3810,7 +3831,7 @@ int main(int argc, char **argv) { } else if(strcmp(quality_str, "ultra") == 0) { quality = VideoQuality::ULTRA; } else { - fprintf(stderr, "Error: -q should either be either 'medium', 'high', 'very_high' or 'ultra', got: '%s'\n", quality_str); + fprintf(stderr, "Error: -q should either be 'medium', 'high', 'very_high' or 'ultra', got: '%s'\n", quality_str); usage(); } } @@ -3825,7 +3846,21 @@ int main(int argc, char **argv) { } else if(strcmp(color_range_str, "full") == 0) { color_range = GSR_COLOR_RANGE_FULL; } else { - fprintf(stderr, "Error: -cr should either be either 'limited' or 'full', got: '%s'\n", color_range_str); + fprintf(stderr, "Error: -cr should either be 'limited' or 'full', got: '%s'\n", color_range_str); + usage(); + } + + Tune tune = Tune::PERFORMANCE; + const char *tune_str = args["-tune"].value(); + if(!tune_str) + tune_str = "performance"; + + if(strcmp(tune_str, "performance") == 0) { + tune = Tune::PERFORMANCE; + } else if(strcmp(tune_str, "quality") == 0) { + tune = Tune::QUALITY; + } else { + fprintf(stderr, "Error: -tune should either be 'performance' or 'quality', got: '%s'\n", tune_str); usage(); } @@ -3843,7 +3878,7 @@ int main(int argc, char **argv) { } if(output_resolution.x < 0 || output_resolution.y < 0) { - fprintf(stderr, "Error: invalud value for option -s '%s', expected width and height to be greater or equal to 0\n", output_resolution_str); + fprintf(stderr, "Error: invalid value for option -s '%s', expected width and height to be greater or equal to 0\n", output_resolution_str); usage(); } } @@ -3863,7 +3898,7 @@ int main(int argc, char **argv) { } if(region_size.x < 0 || region_size.y < 0 || region_position.x < 0 || region_position.y < 0) { - fprintf(stderr, "Error: invalud value for option -region '%s', expected width, height, x and y to be greater or equal to 0\n", region_str); + fprintf(stderr, "Error: invalid value for option -region '%s', expected width, height, x and y to be greater or equal to 0\n", region_str); usage(); } } else { @@ -3990,6 +4025,9 @@ int main(int argc, char **argv) { if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); + if(tune == Tune::QUALITY) + video_codec_context->max_b_frames = 2; + AVFrame *video_frame = av_frame_alloc(); if(!video_frame) { fprintf(stderr, "Error: Failed to allocate video frame\n"); @@ -4054,7 +4092,7 @@ int main(int argc, char **argv) { if(use_software_video_encoder) { open_video_software(video_codec_context, quality, pixel_format, hdr, color_depth, bitrate_mode); } else { - open_video_hardware(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr, color_depth, bitrate_mode, video_codec, low_power); + open_video_hardware(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr, color_depth, bitrate_mode, video_codec, low_power, tune); } if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); diff --git a/src/window/wayland.c b/src/window/wayland.c index 03ad47a..7e319fb 100644 --- a/src/window/wayland.c +++ b/src/window/wayland.c @@ -9,19 +9,23 @@ #include <stdint.h> #include <wayland-client.h> #include <wayland-egl.h> +#include "xdg-output-unstable-v1-client-protocol.h" #define GSR_MAX_OUTPUTS 32 +typedef struct gsr_window_wayland gsr_window_wayland; + typedef struct { uint32_t wl_name; - void *output; + struct wl_output *output; + struct zxdg_output_v1 *xdg_output; vec2i pos; vec2i size; int32_t transform; char *name; } gsr_wayland_output; -typedef struct { +struct gsr_window_wayland { void *display; void *window; void *registry; @@ -29,7 +33,8 @@ typedef struct { void *compositor; gsr_wayland_output outputs[GSR_MAX_OUTPUTS]; int num_outputs; -} gsr_window_wayland; + struct zxdg_output_manager_v1 *xdg_output_manager; +}; static void output_handle_geometry(void *data, struct wl_output *wl_output, int32_t x, int32_t y, int32_t phys_width, int32_t phys_height, @@ -95,7 +100,7 @@ static const struct wl_output_listener output_listener = { static void registry_add_object(void *data, struct wl_registry *registry, uint32_t name, const char *interface, uint32_t version) { (void)version; gsr_window_wayland *window_wayland = data; - if (strcmp(interface, "wl_compositor") == 0) { + if(strcmp(interface, "wl_compositor") == 0) { if(window_wayland->compositor) { wl_compositor_destroy(window_wayland->compositor); window_wayland->compositor = NULL; @@ -103,7 +108,7 @@ static void registry_add_object(void *data, struct wl_registry *registry, uint32 window_wayland->compositor = wl_registry_bind(registry, name, &wl_compositor_interface, 1); } else if(strcmp(interface, wl_output_interface.name) == 0) { if(version < 4) { - fprintf(stderr, "gsr warning: wl output interface version is < 4, expected >= 4 to capture a monitor. Using KMS capture instead\n"); + fprintf(stderr, "gsr warning: wl output interface version is < 4, expected >= 4 to capture a monitor\n"); return; } @@ -123,6 +128,17 @@ static void registry_add_object(void *data, struct wl_registry *registry, uint32 .name = NULL, }; wl_output_add_listener(gsr_output->output, &output_listener, gsr_output); + } else if(strcmp(interface, zxdg_output_manager_v1_interface.name) == 0) { + if(version < 1) { + fprintf(stderr, "gsr warning: xdg output interface version is < 1, expected >= 1 to capture a monitor\n"); + return; + } + + if(window_wayland->xdg_output_manager) { + zxdg_output_manager_v1_destroy(window_wayland->xdg_output_manager); + window_wayland->xdg_output_manager = NULL; + } + window_wayland->xdg_output_manager = wl_registry_bind(registry, name, &zxdg_output_manager_v1_interface, 1); } } @@ -138,6 +154,60 @@ static struct wl_registry_listener registry_listener = { .global_remove = registry_remove_object, }; +static void xdg_output_logical_position(void *data, struct zxdg_output_v1 *zxdg_output_v1, int32_t x, int32_t y) { + (void)zxdg_output_v1; + gsr_wayland_output *gsr_xdg_output = data; + gsr_xdg_output->pos.x = x; + gsr_xdg_output->pos.y = y; +} + +static void xdg_output_handle_logical_size(void *data, struct zxdg_output_v1 *xdg_output, int32_t width, int32_t height) { + (void)data; + (void)xdg_output; + (void)width; + (void)height; +} + +static void xdg_output_handle_done(void *data, struct zxdg_output_v1 *xdg_output) { + (void)data; + (void)xdg_output; +} + +static void xdg_output_handle_name(void *data, struct zxdg_output_v1 *xdg_output, const char *name) { + (void)data; + (void)xdg_output; + (void)name; +} + +static void xdg_output_handle_description(void *data, struct zxdg_output_v1 *xdg_output, const char *description) { + (void)data; + (void)xdg_output; + (void)description; +} + +static const struct zxdg_output_v1_listener xdg_output_listener = { + .logical_position = xdg_output_logical_position, + .logical_size = xdg_output_handle_logical_size, + .done = xdg_output_handle_done, + .name = xdg_output_handle_name, + .description = xdg_output_handle_description, +}; + +static void gsr_window_wayland_set_monitor_outputs_from_xdg_output(gsr_window_wayland *self) { + if(!self->xdg_output_manager) { + fprintf(stderr, "gsr warning: zxdg_output_manager not found. registered monitor positions might be incorrect\n"); + return; + } + + for(int i = 0; i < self->num_outputs; ++i) { + self->outputs[i].xdg_output = zxdg_output_manager_v1_get_xdg_output(self->xdg_output_manager, self->outputs[i].output); + zxdg_output_v1_add_listener(self->outputs[i].xdg_output, &xdg_output_listener, &self->outputs[i]); + } + + // Fetch xdg_output + wl_display_roundtrip(self->display); +} + static void gsr_window_wayland_deinit(gsr_window_wayland *self) { if(self->window) { wl_egl_window_destroy(self->window); @@ -159,9 +229,19 @@ static void gsr_window_wayland_deinit(gsr_window_wayland *self) { free(self->outputs[i].name); self->outputs[i].name = NULL; } + + if(self->outputs[i].xdg_output) { + zxdg_output_v1_destroy(self->outputs[i].xdg_output); + self->outputs[i].output = NULL; + } } self->num_outputs = 0; + if(self->xdg_output_manager) { + zxdg_output_manager_v1_destroy(self->xdg_output_manager); + self->xdg_output_manager = NULL; + } + if(self->compositor) { wl_compositor_destroy(self->compositor); self->compositor = NULL; @@ -194,6 +274,8 @@ static bool gsr_window_wayland_init(gsr_window_wayland *self) { // Fetch wl_output wl_display_roundtrip(self->display); + gsr_window_wayland_set_monitor_outputs_from_xdg_output(self); + if(!self->compositor) { fprintf(stderr, "gsr error: gsr_window_wayland_init failed: failed to find compositor\n"); goto fail; |