diff options
author | dec05eba <dec05eba@protonmail.com> | 2025-04-04 01:15:07 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2025-04-04 01:15:07 +0200 |
commit | 12dd2cd3e101e2336a4db96ef381dfcc88bcde73 (patch) | |
tree | 298c2b3692f94106fe7450a7907f7d1c03295ac7 | |
parent | 33467cb7f4bb4b81cff247e4d28c88f00c8bbdf6 (diff) |
Optimize compute shaders
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | TODO | 7 | ||||
-rw-r--r-- | include/color_conversion.h | 4 | ||||
-rw-r--r-- | src/capture/kms.c | 6 | ||||
-rw-r--r-- | src/capture/nvfbc.c | 2 | ||||
-rw-r--r-- | src/capture/portal.c | 4 | ||||
-rw-r--r-- | src/capture/xcomposite.c | 4 | ||||
-rw-r--r-- | src/capture/ximage.c | 4 | ||||
-rw-r--r-- | src/color_conversion.c | 152 |
9 files changed, 120 insertions, 65 deletions
@@ -42,7 +42,7 @@ When recording GTA V at 4k on highest settings, fps drops from 60 to 23 when usi GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg).\ GPU Screen Recorder has much better performance than OBS Studio even with version 30.2 that does "zero-copy" recording and encoding, see: [https://www.youtube.com/watch?v=jdroRjibsDw](https://www.youtube.com/watch?v=jdroRjibsDw).\ It is recommended to save the video to a SSD because of the large file size, which a slow HDD might not be fast enough to handle. Using variable framerate mode (-fm vfr) which is the default is also recommended as this reduces encoding load. Ultra quality is also overkill most of the time, very high (the default) or lower quality is usually enough.\ -Note that recording on AMD can have some performance issues on Wayland in the recording itself when recording without desktop portal unless your mesa version is 25.0.0 or greater. +Note that for best performance you should close other screen recorders such as OBS Studio when using GPU Screen Recorder even if they are not recording, since they can affect performance even when idle. This is the case with OBS Studio. ## Note about optimal performance on NVIDIA NVIDIA driver has a "feature" (read: bug) where it will downclock memory transfer rate when a program uses cuda (or nvenc, which uses cuda), such as GPU Screen Recorder. To work around this bug, GPU Screen Recorder can overclock your GPU memory transfer rate to it's normal optimal level.\ To enable overclocking for optimal performance use the `-oc` option when running GPU Screen Recorder. You also need to have "Coolbits" NVIDIA X setting set to "12" to enable overclocking. You can automatically add this option if you run `sudo nvidia-xconfig --cool-bits=12` and then reboot your computer.\ @@ -4,7 +4,6 @@ See https://trac.ffmpeg.org/wiki/EncodingForStreamingSites for optimizing stream Look at VK_EXT_external_memory_dma_buf. Use mov+faststart. Allow recording all monitors/selected monitor without nvfbc by recording the compositor proxy window and only recording the part that matches the monitor(s). -Allow recording a region by recording the compositor proxy window / nvfbc window and copying part of it. Support amf and qsv. Disable flipping on nvidia? this might fix some stuttering issues on some setups. See NvCtrlGetAttribute/NvCtrlSetAttributeAndGetStatus NV_CTRL_SYNC_TO_VBLANK https://github.com/NVIDIA/nvidia-settings/blob/d5f022976368cbceb2f20b838ddb0bf992f0cfb9/src/gtk%2B-2.x/ctkopengl.c. Replays seem to have some issues with audio/video. Why? @@ -256,3 +255,9 @@ Do proper exit, to call gsr_capture_destroy which will properly stop gsr-kms-ser Replace all scissors with clearing textures if the cursor hits the outside of the frame image. Cursor position might be slightly wrong on rotated monitor. + +External texture doesn't work on nvidia x11, probably because of glx context (requires gles es). External texture is not used on nvidia x11 right now so it's not an issue. + +Add option to save replay buffer on disk instead of ram. + +nvfbc capture cursor with cursor.h instead and composite that on top. This allows us to also always get a cursor in direct capture mode. This could possible give better performance as well.
\ No newline at end of file diff --git a/include/color_conversion.h b/include/color_conversion.h index 1c067e2..4c3b615 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -6,7 +6,7 @@ #include "vec2.h" #include <stdbool.h> -#define GSR_COLOR_CONVERSION_MAX_SHADERS 6 +#define GSR_COLOR_CONVERSION_MAX_SHADERS 12 #define GSR_COLOR_CONVERSION_MAX_FRAMEBUFFERS 2 typedef enum { @@ -72,7 +72,7 @@ typedef struct { int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params); void gsr_color_conversion_deinit(gsr_color_conversion *self); -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color); +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, gsr_source_color source_color, bool external_texture, bool alpha_blending); void gsr_color_conversion_clear(gsr_color_conversion *self); gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation); diff --git a/src/capture/kms.c b/src/capture/kms.c index 8bb09a0..18858f2 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -503,7 +503,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->cursor_texture_id, cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y}, (vec2i){0, 0}, cursor_size, cursor_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, cursor_texture_id_is_external, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -531,7 +531,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id, cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y}, (vec2i){0, 0}, self->x11_cursor.size, self->x11_cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -647,7 +647,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, target_pos, output_size, capture_pos, self->capture_size, original_frame_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, self->external_texture_fallback, false); if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 4ed19b3..b92bd41 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -397,7 +397,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex], target_pos, (vec2i){output_size.x, output_size.y}, self->params.region_position, frame_size, original_frame_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR); + GSR_ROT_0, GSR_SOURCE_COLOR_BGR, false, false); //self->params.egl->glFlush(); //self->params.egl->glFinish(); diff --git a/src/capture/portal.c b/src/capture/portal.c index ec87ab6..27f514f 100644 --- a/src/capture/portal.c +++ b/src/capture/portal.c @@ -348,7 +348,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, target_pos, output_size, (vec2i){region.x, region.y}, self->capture_size, self->capture_size, - GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, using_external_image, false); if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) { const vec2d scale = { @@ -366,7 +366,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id, (vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y}, (vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height}, (vec2i){cursor_region.width, cursor_region.height}, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index 2d0574c..db41f63 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -259,7 +259,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), target_pos, output_size, (vec2i){0, 0}, self->texture_size, self->texture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -280,7 +280,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); } //self->params.egl->glFlush(); diff --git a/src/capture/ximage.c b/src/capture/ximage.c index 1f86d93..9b02907 100644 --- a/src/capture/ximage.c +++ b/src/capture/ximage.c @@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_id, target_pos, output_size, (vec2i){0, 0}, self->capture_size, self->capture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/color_conversion.c b/src/color_conversion.c index 27ef488..1f95fd3 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -5,15 +5,18 @@ #include <string.h> #include <assert.h> -#define SHADER_INDEX_Y 0 -#define SHADER_INDEX_UV 1 -#define SHADER_INDEX_Y_EXTERNAL 2 -#define SHADER_INDEX_UV_EXTERNAL 3 -#define SHADER_INDEX_RGB 4 -#define SHADER_INDEX_RGB_EXTERNAL 5 - -// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result -// with a call to mix to choose source/output color. +#define SHADER_INDEX_Y 0 +#define SHADER_INDEX_UV 1 +#define SHADER_INDEX_Y_EXTERNAL 2 +#define SHADER_INDEX_UV_EXTERNAL 3 +#define SHADER_INDEX_RGB 4 +#define SHADER_INDEX_RGB_EXTERNAL 5 +#define SHADER_INDEX_Y_BLEND 6 +#define SHADER_INDEX_UV_BLEND 7 +#define SHADER_INDEX_Y_EXTERNAL_BLEND 8 +#define SHADER_INDEX_UV_EXTERNAL_BLEND 9 +#define SHADER_INDEX_RGB_BLEND 10 +#define SHADER_INDEX_RGB_EXTERNAL_BLEND 11 /* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */ @@ -75,9 +78,7 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color return NULL; } -// TODO: Make alpha blending optional -// TODO: Optimize these shaders. -static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); char compute_shader[2048]; @@ -98,16 +99,17 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord), 0);\n" " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix, + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -119,7 +121,7 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni return 0; } -static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); char compute_shader[2048]; @@ -140,16 +142,17 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 2;\n" // size/4 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n" - " vec2 output_texel_coord = vec2(texel_coord - source_position/2 + target_position/2) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord * 2.0);\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" + " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord) << 1, 0);\n" " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n" - " imageStore(img_output, texel_coord + target_position/2, vec4(uv_color, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + " imageStore(img_output, texel_coord + target_position, vec4(uv_color, 1.0, 1.0));\n" + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix, + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -161,7 +164,7 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un return 0; } -static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, bool external_texture) { +static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, bool external_texture, bool alpha_blending) { char compute_shader[2048]; snprintf(compute_shader, sizeof(compute_shader), "#version 310 es\n" @@ -179,15 +182,16 @@ static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_u "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" - " vec4 output_color = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 source_color = texelFetch(img_input, ivec2(rotated_texel_coord), 0);\n" + " vec4 output_color = %s;\n" " vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(color, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D"); + "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", + alpha_blending ? "texelFetch(img_background, ivec2(output_texel_coord), 0)" : "source_color"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -267,23 +271,43 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y], self->params.egl, &self->uniforms[SHADER_INDEX_Y], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y], self->params.egl, &self->uniforms[SHADER_INDEX_Y], self->max_local_size_dim, params->destination_color, params->color_range, false, false) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV], self->params.egl, &self->uniforms[SHADER_INDEX_UV], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV], self->params.egl, &self->uniforms[SHADER_INDEX_UV], self->max_local_size_dim, params->destination_color, params->color_range, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + goto err; + } + + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_Y_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, false, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_UV_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, false, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); goto err; } if(self->params.load_external_image_shader) { - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true, false) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + goto err; + } + + if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, true, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL_BLEND], self->max_local_size_dim, params->destination_color, params->color_range, true, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); goto err; } @@ -296,13 +320,23 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB], self->params.egl, &self->uniforms[SHADER_INDEX_RGB], self->max_local_size_dim, false) != 0) { + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB], self->params.egl, &self->uniforms[SHADER_INDEX_RGB], self->max_local_size_dim, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_BLEND], self->max_local_size_dim, false, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } if(self->params.load_external_image_shader) { - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true) != 0) { + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + goto err; + } + + if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL_BLEND], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL_BLEND], self->max_local_size_dim, true, true) != 0) { fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } @@ -411,7 +445,7 @@ typedef enum { GSR_COLOR_COMP_RGB } gsr_color_component; -static int color_component_get_texture_index(gsr_color_component color_component) { +static int color_component_get_destination_texture_index(gsr_color_component color_component) { switch(color_component) { case GSR_COLOR_COMP_Y: return 0; case GSR_COLOR_COMP_UV: return 1; @@ -431,23 +465,38 @@ static unsigned int color_component_get_color_format(gsr_color_component color_c return GL_RGBA8; } -static int color_component_get_shader_index(gsr_color_component color_component, bool external_texture) { +static int color_component_get_shader_index(gsr_color_component color_component, bool external_texture, bool alpha_blending) { switch(color_component) { - case GSR_COLOR_COMP_Y: return external_texture ? SHADER_INDEX_Y_EXTERNAL : SHADER_INDEX_Y; - case GSR_COLOR_COMP_UV: return external_texture ? SHADER_INDEX_UV_EXTERNAL : SHADER_INDEX_UV; - case GSR_COLOR_COMP_RGB: return external_texture ? SHADER_INDEX_RGB_EXTERNAL : SHADER_INDEX_RGB; + case GSR_COLOR_COMP_Y: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_Y_EXTERNAL_BLEND : SHADER_INDEX_Y_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_Y_BLEND : SHADER_INDEX_Y; + } + case GSR_COLOR_COMP_UV: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_UV_EXTERNAL_BLEND : SHADER_INDEX_UV_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_UV_BLEND : SHADER_INDEX_UV; + } + case GSR_COLOR_COMP_RGB: { + if(external_texture) + return alpha_blending ? SHADER_INDEX_RGB_EXTERNAL_BLEND : SHADER_INDEX_RGB_EXTERNAL; + else + return alpha_blending ? SHADER_INDEX_RGB_BLEND : SHADER_INDEX_RGB; + } } assert(false); return SHADER_INDEX_RGB; } -static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { - const int shader_index = color_component_get_shader_index(color_component, external_texture); - const int texture_index = color_component_get_texture_index(color_component); +static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, bool alpha_blending, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { + const int shader_index = color_component_get_shader_index(color_component, external_texture, alpha_blending); + const int destination_texture_index = color_component_get_destination_texture_index(color_component); const unsigned int color_format = color_component_get_color_format(color_component, use_16bit_colors); self->params.egl->glActiveTexture(GL_TEXTURE1); - self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[texture_index]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[destination_texture_index]); self->params.egl->glActiveTexture(GL_TEXTURE0); gsr_color_uniforms *uniform = &self->uniforms[shader_index]; @@ -456,13 +505,13 @@ static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *s self->params.egl->glUniform2i(uniform->source_position, source_position.x, source_position.y); self->params.egl->glUniform2i(uniform->target_position, destination_pos.x, destination_pos.y); self->params.egl->glUniform2f(uniform->scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[destination_texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); const double num_groups_x = ceil((double)destination_size.x/(double)self->max_local_size_dim); const double num_groups_y = ceil((double)destination_size.y/(double)self->max_local_size_dim); self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); } -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) { +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, gsr_source_color source_color, bool external_texture, bool alpha_blending) { vec2f scale = {0.0f, 0.0f}; if(source_size.x > 0 && source_size.y > 0) scale = (vec2f){ (double)destination_size.x/(double)source_size.x, (double)destination_size.y/(double)source_size.y }; @@ -482,12 +531,13 @@ void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_ case GSR_DESTINATION_COLOR_NV12: case GSR_DESTINATION_COLOR_P010: { const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, (vec2i){source_position.x/2, source_position.y/2}, + (vec2i){destination_pos.x/2, destination_pos.y/2}, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); break; } case GSR_DESTINATION_COLOR_RGB8: { - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); break; } } |