diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/capture/kms.c | 7 | ||||
-rw-r--r-- | src/capture/nvfbc.c | 2 | ||||
-rw-r--r-- | src/capture/portal.c | 4 | ||||
-rw-r--r-- | src/capture/xcomposite.c | 4 | ||||
-rw-r--r-- | src/capture/ximage.c | 4 | ||||
-rw-r--r-- | src/color_conversion.c | 701 | ||||
-rw-r--r-- | src/egl.c | 8 | ||||
-rw-r--r-- | src/encoder/video/vulkan.c | 267 | ||||
-rw-r--r-- | src/main.cpp | 105 | ||||
-rw-r--r-- | src/shader.c | 20 | ||||
-rw-r--r-- | src/utils.c | 19 | ||||
-rw-r--r-- | src/window/wayland.c | 103 |
12 files changed, 916 insertions, 328 deletions
diff --git a/src/capture/kms.c b/src/capture/kms.c index 2cbd757..18858f2 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -503,7 +503,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->cursor_texture_id, cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y}, (vec2i){0, 0}, cursor_size, cursor_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, cursor_texture_id_is_external, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -531,7 +531,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id, cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y}, (vec2i){0, 0}, self->x11_cursor.size, self->x11_cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -574,6 +574,7 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) { monitor.name = self->params.display_to_capture; vec2i monitor_position = {0, 0}; + // TODO: This is cached. We need it updated. drm_monitor_get_display_server_data(self->params.egl->window, &monitor, &self->monitor_rotation, &monitor_position); self->capture_pos = monitor.pos; @@ -646,7 +647,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, target_pos, output_size, capture_pos, self->capture_size, original_frame_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), GSR_SOURCE_COLOR_RGB, self->external_texture_fallback, false); if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 4ed19b3..b92bd41 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -397,7 +397,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex], target_pos, (vec2i){output_size.x, output_size.y}, self->params.region_position, frame_size, original_frame_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR); + GSR_ROT_0, GSR_SOURCE_COLOR_BGR, false, false); //self->params.egl->glFlush(); //self->params.egl->glFinish(); diff --git a/src/capture/portal.c b/src/capture/portal.c index ec87ab6..27f514f 100644 --- a/src/capture/portal.c +++ b/src/capture/portal.c @@ -348,7 +348,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, target_pos, output_size, (vec2i){region.x, region.y}, self->capture_size, self->capture_size, - GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, using_external_image, false); if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) { const vec2d scale = { @@ -366,7 +366,7 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id, (vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y}, (vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height}, (vec2i){cursor_region.width, cursor_region.height}, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index 2d0574c..db41f63 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -259,7 +259,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), target_pos, output_size, (vec2i){0, 0}, self->texture_size, self->texture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -280,7 +280,7 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); } //self->params.egl->glFlush(); diff --git a/src/capture/ximage.c b/src/capture/ximage.c index 1f86d93..9b02907 100644 --- a/src/capture/ximage.c +++ b/src/capture/ximage.c @@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_id, target_pos, output_size, (vec2i){0, 0}, self->capture_size, self->capture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, false); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, GSR_SOURCE_COLOR_RGB, false, true); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/color_conversion.c b/src/color_conversion.c index 27ef488..88dc398 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -5,15 +5,25 @@ #include <string.h> #include <assert.h> -#define SHADER_INDEX_Y 0 -#define SHADER_INDEX_UV 1 -#define SHADER_INDEX_Y_EXTERNAL 2 -#define SHADER_INDEX_UV_EXTERNAL 3 -#define SHADER_INDEX_RGB 4 -#define SHADER_INDEX_RGB_EXTERNAL 5 - -// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result -// with a call to mix to choose source/output color. +#define COMPUTE_SHADER_INDEX_Y 0 +#define COMPUTE_SHADER_INDEX_UV 1 +#define COMPUTE_SHADER_INDEX_Y_EXTERNAL 2 +#define COMPUTE_SHADER_INDEX_UV_EXTERNAL 3 +#define COMPUTE_SHADER_INDEX_RGB 4 +#define COMPUTE_SHADER_INDEX_RGB_EXTERNAL 5 +#define COMPUTE_SHADER_INDEX_Y_BLEND 6 +#define COMPUTE_SHADER_INDEX_UV_BLEND 7 +#define COMPUTE_SHADER_INDEX_Y_EXTERNAL_BLEND 8 +#define COMPUTE_SHADER_INDEX_UV_EXTERNAL_BLEND 9 +#define COMPUTE_SHADER_INDEX_RGB_BLEND 10 +#define COMPUTE_SHADER_INDEX_RGB_EXTERNAL_BLEND 11 + +#define GRAPHICS_SHADER_INDEX_Y 0 +#define GRAPHICS_SHADER_INDEX_UV 1 +#define GRAPHICS_SHADER_INDEX_Y_EXTERNAL 2 +#define GRAPHICS_SHADER_INDEX_UV_EXTERNAL 3 +#define GRAPHICS_SHADER_INDEX_RGB 4 +#define GRAPHICS_SHADER_INDEX_RGB_EXTERNAL 5 /* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */ @@ -75,20 +85,35 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color return NULL; } -// TODO: Make alpha blending optional -// TODO: Optimize these shaders. -static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static void get_compute_shader_header(char *header, size_t header_size, bool external_texture) { + if(external_texture) { + snprintf(header, header_size, + "#version 310 es\n" + "#extension GL_ARB_compute_shader: enable\n" + "#extension GL_OES_EGL_image_external : enable\n" + "#extension GL_OES_EGL_image_external_essl3 : require\n" + "layout(binding = 0) uniform highp samplerExternalOES img_input;\n" + "layout(binding = 1) uniform highp sampler2D img_background;\n"); + } else { + snprintf(header, header_size, + "#version 420\n" + "#extension GL_ARB_compute_shader: enable\n" + "layout(binding = 0) uniform highp sampler2D img_input;\n" + "layout(binding = 1) uniform highp sampler2D img_background;\n"); + } +} + +static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_compute_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); + char header[512]; + get_compute_shader_header(header, sizeof(header), external_texture); + char compute_shader[2048]; snprintf(compute_shader, sizeof(compute_shader), - "#version 310 es\n" - "#extension GL_OES_EGL_image_external : enable\n" - "#extension GL_OES_EGL_image_external_essl3 : require\n" - "precision highp float;\n" + "%s" "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "layout(binding = 0) uniform highp %s img_input;\n" - "layout(binding = 1) uniform highp sampler2D img_background;\n" + "precision highp float;\n" "uniform ivec2 source_position;\n" "uniform ivec2 target_position;\n" "uniform vec2 scale;\n" @@ -98,16 +123,17 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" + " vec4 source_color = texture(img_input, rotated_texel_coord/vec2(size));\n" " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + "}\n", header, max_local_size_dim, max_local_size_dim, color_transform_matrix, + alpha_blending ? "texture(img_background, output_texel_coord/vec2(output_size))" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -119,18 +145,17 @@ static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uni return 0; } -static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_compute_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture, bool alpha_blending) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); + char header[512]; + get_compute_shader_header(header, sizeof(header), external_texture); + char compute_shader[2048]; snprintf(compute_shader, sizeof(compute_shader), - "#version 310 es\n" - "#extension GL_OES_EGL_image_external : enable\n" - "#extension GL_OES_EGL_image_external_essl3 : require\n" - "precision highp float;\n" + "%s" "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "layout(binding = 0) uniform highp %s img_input;\n" - "layout(binding = 1) uniform highp sampler2D img_background;\n" + "precision highp float;\n" "uniform ivec2 source_position;\n" "uniform ivec2 target_position;\n" "uniform vec2 scale;\n" @@ -140,16 +165,17 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 2;\n" // size/4 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n" - " vec2 output_texel_coord = vec2(texel_coord - source_position/2 + target_position/2) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord * 2.0);\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" + " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" + " vec4 source_color = texture(img_input, rotated_texel_coord/vec2(size>>1));\n" // size/2 " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 output_color_yuv = %s;\n" " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n" - " imageStore(img_output, texel_coord + target_position/2, vec4(uv_color, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D", color_transform_matrix); + " imageStore(img_output, texel_coord + target_position, vec4(uv_color, 1.0, 1.0));\n" + "}\n", header, max_local_size_dim, max_local_size_dim, color_transform_matrix, + alpha_blending ? "texture(img_background, output_texel_coord/vec2(output_size))" : "source_color_yuv"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -161,16 +187,14 @@ static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_un return 0; } -static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, bool external_texture) { +static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_compute_uniforms *uniforms, int max_local_size_dim, bool external_texture, bool alpha_blending) { + char header[512]; + get_compute_shader_header(header, sizeof(header), external_texture); + char compute_shader[2048]; snprintf(compute_shader, sizeof(compute_shader), - "#version 310 es\n" - "#extension GL_OES_EGL_image_external : enable\n" - "#extension GL_OES_EGL_image_external_essl3 : require\n" - "precision highp float;\n" + "%s" "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "layout(binding = 0) uniform highp %s img_input;\n" - "layout(binding = 1) uniform highp sampler2D img_background;\n" "uniform ivec2 source_position;\n" "uniform ivec2 target_position;\n" "uniform vec2 scale;\n" @@ -179,15 +203,16 @@ static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_u "void main() {\n" " ivec2 texel_coord = ivec2(gl_GlobalInvocationID.xy);\n" " ivec2 size = ivec2(vec2(textureSize(img_input, 0)) * scale + 0.5);\n" + " ivec2 size_shift = size >> 1;\n" // size/2 " ivec2 output_size = textureSize(img_background, 0);\n" - " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 rotated_texel_coord = vec2(texel_coord - source_position - size_shift) * rotation_matrix + vec2(size_shift) + 0.5;\n" " vec2 output_texel_coord = vec2(texel_coord - source_position + target_position) + 0.5;\n" - " vec2 tex_coord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(img_input, tex_coord);\n" - " vec4 output_color = texture(img_background, output_texel_coord/vec2(output_size));\n" + " vec4 source_color = texture(img_input, rotated_texel_coord/vec2(size));\n" + " vec4 output_color = %s;\n" " vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n" " imageStore(img_output, texel_coord + target_position, vec4(color, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, external_texture ? "samplerExternalOES" : "sampler2D"); + "}\n", header, max_local_size_dim, max_local_size_dim, + alpha_blending ? "texture(img_background, output_texel_coord/vec2(output_size))" : "source_color"); if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; @@ -199,6 +224,190 @@ static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_u return 0; } +static int load_graphics_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_graphics_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { + const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); + + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + "uniform mat2 rotation_matrix; \n" + "void main() \n" + "{ \n" + " texcoords_out = vec2(texcoords.x - 0.5, texcoords.y - 0.5) * rotation_matrix + vec2(0.5, 0.5); \n" + " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n" + " FragColor.w = pixel.a; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader, NULL) != 0) + return -1; + + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + return 0; +} + +static unsigned int load_graphics_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_graphics_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { + const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); + + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + "uniform mat2 rotation_matrix; \n" + "void main() \n" + "{ \n" + " texcoords_out = vec2(texcoords.x - 0.5, texcoords.y - 0.5) * rotation_matrix + vec2(0.5, 0.5); \n" + " gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n" + " FragColor.w = pixel.a; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader, NULL) != 0) + return -1; + + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + return 0; +} + +static unsigned int load_graphics_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_graphics_uniforms *uniforms, bool external_texture) { + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + "uniform mat2 rotation_matrix; \n" + "void main() \n" + "{ \n" + " texcoords_out = vec2(texcoords.x - 0.5, texcoords.y - 0.5) * rotation_matrix + vec2(0.5, 0.5); \n" + " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor = pixel; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "void main() \n" + "{ \n" + "%s" + "} \n", main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "void main() \n" + "{ \n" + "%s" + "} \n", main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader, NULL) != 0) + return -1; + + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + return 0; +} + static int load_framebuffers(gsr_color_conversion *self) { /* TODO: Only generate the necessary amount of framebuffers (self->params.num_destination_textures) */ const unsigned int draw_buffer = GL_COLOR_ATTACHMENT0; @@ -248,6 +457,140 @@ static int create_vertices(gsr_color_conversion *self) { return 0; } +static bool gsr_color_conversion_load_compute_shaders(gsr_color_conversion *self) { + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + if(load_compute_shader_y(&self->compute_shaders[COMPUTE_SHADER_INDEX_Y], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_Y], self->max_local_size_dim, self->params.destination_color, self->params.color_range, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_uv(&self->compute_shaders[COMPUTE_SHADER_INDEX_UV], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_UV], self->max_local_size_dim, self->params.destination_color, self->params.color_range, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + return false; + } + + if(load_compute_shader_y(&self->compute_shaders[COMPUTE_SHADER_INDEX_Y_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_Y_BLEND], self->max_local_size_dim, self->params.destination_color, self->params.color_range, false, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_uv(&self->compute_shaders[COMPUTE_SHADER_INDEX_UV_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_UV_BLEND], self->max_local_size_dim, self->params.destination_color, self->params.color_range, false, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + return false; + } + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + if(load_compute_shader_rgb(&self->compute_shaders[COMPUTE_SHADER_INDEX_RGB], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_RGB], self->max_local_size_dim, false, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_rgb(&self->compute_shaders[COMPUTE_SHADER_INDEX_RGB_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_RGB_BLEND], self->max_local_size_dim, false, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + break; + } + } + return true; +} + +static bool gsr_color_conversion_load_external_compute_shaders(gsr_color_conversion *self) { + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + if(load_compute_shader_y(&self->compute_shaders[COMPUTE_SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, self->params.destination_color, self->params.color_range, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_uv(&self->compute_shaders[COMPUTE_SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, self->params.destination_color, self->params.color_range, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + return false; + } + + if(load_compute_shader_y(&self->compute_shaders[COMPUTE_SHADER_INDEX_Y_EXTERNAL_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_Y_EXTERNAL_BLEND], self->max_local_size_dim, self->params.destination_color, self->params.color_range, true, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_uv(&self->compute_shaders[COMPUTE_SHADER_INDEX_UV_EXTERNAL_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_UV_EXTERNAL_BLEND], self->max_local_size_dim, self->params.destination_color, self->params.color_range, true, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + return false; + } + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + if(load_compute_shader_rgb(&self->compute_shaders[COMPUTE_SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + + if(load_compute_shader_rgb(&self->compute_shaders[COMPUTE_SHADER_INDEX_RGB_EXTERNAL_BLEND], self->params.egl, &self->compute_uniforms[COMPUTE_SHADER_INDEX_RGB_EXTERNAL_BLEND], self->max_local_size_dim, true, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + return false; + } + break; + } + } + return true; +} + +static bool gsr_color_conversion_load_graphics_shaders(gsr_color_conversion *self) { + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + if(load_graphics_shader_y(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_Y], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_Y], self->params.destination_color, self->params.color_range, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y graphics shader\n"); + return false; + } + + if(load_graphics_shader_uv(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_UV], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_UV], self->params.destination_color, self->params.color_range, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV graphics shader\n"); + return false; + } + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + if(load_graphics_shader_rgb(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_RGB], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_RGB], false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y graphics shader\n"); + return false; + } + break; + } + } + return true; +} + +static bool gsr_color_conversion_load_external_graphics_shaders(gsr_color_conversion *self) { + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + if(load_graphics_shader_y(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_Y_EXTERNAL], self->params.destination_color, self->params.color_range, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y graphics shader\n"); + return false; + } + + if(load_graphics_shader_uv(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_UV_EXTERNAL], self->params.destination_color, self->params.color_range, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV graphics shader\n"); + return false; + } + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + if(load_graphics_shader_rgb(&self->graphics_shaders[GRAPHICS_SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->graphics_uniforms[GRAPHICS_SHADER_INDEX_RGB_EXTERNAL], true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y graphics shader\n"); + return false; + } + break; + } + } + return true; +} + int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params) { assert(params); assert(params->egl); @@ -259,58 +602,40 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations); self->max_local_size_dim = sqrt(max_compute_work_group_invocations); - switch(params->destination_color) { + switch(self->params.destination_color) { case GSR_DESTINATION_COLOR_NV12: case GSR_DESTINATION_COLOR_P010: { if(self->params.num_destination_textures != 2) { fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 2 destination textures for destination color NV12/P010, got %d destination texture(s)\n", self->params.num_destination_textures); - return -1; - } - - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y], self->params.egl, &self->uniforms[SHADER_INDEX_Y], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV], self->params.egl, &self->uniforms[SHADER_INDEX_UV], self->max_local_size_dim, params->destination_color, params->color_range, false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); - goto err; - } - - if(self->params.load_external_image_shader) { - if(load_compute_shader_y(&self->shaders[SHADER_INDEX_Y_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_Y_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); - goto err; - } - - if(load_compute_shader_uv(&self->shaders[SHADER_INDEX_UV_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_UV_EXTERNAL], self->max_local_size_dim, params->destination_color, params->color_range, true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); - goto err; - } - } break; } case GSR_DESTINATION_COLOR_RGB8: { if(self->params.num_destination_textures != 1) { fprintf(stderr, "gsr error: gsr_color_conversion_init: expected 1 destination textures for destination color RGB8, got %d destination texture(s)\n", self->params.num_destination_textures); - return -1; - } - - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB], self->params.egl, &self->uniforms[SHADER_INDEX_RGB], self->max_local_size_dim, false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - - if(self->params.load_external_image_shader) { - if(load_compute_shader_rgb(&self->shaders[SHADER_INDEX_RGB_EXTERNAL], self->params.egl, &self->uniforms[SHADER_INDEX_RGB_EXTERNAL], self->max_local_size_dim, true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); - goto err; - } - } break; } } + if(!gsr_color_conversion_load_compute_shaders(self)) { + self->compute_shaders_failed_to_load = true; + fprintf(stderr, "gsr info: failed to load one or more compute shaders, run gpu-screen-recorder with the '-gl-debug yes' option to see why. Falling back to slower graphics shader instead\n"); + if(!gsr_color_conversion_load_graphics_shaders(self)) + goto err; + } + + if(self->params.load_external_image_shader) { + if(!gsr_color_conversion_load_external_compute_shaders(self)) { + self->external_compute_shaders_failed_to_load = true; + fprintf(stderr, "gsr info: failed to load one or more external compute shaders, run gpu-screen-recorder with the '-gl-debug yes' option to see why. Falling back to slower graphics shader instead\n"); + if(!gsr_color_conversion_load_external_graphics_shaders(self)) + goto err; + } + } + if(load_framebuffers(self) != 0) goto err; @@ -343,14 +668,18 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) { self->framebuffers[i] = 0; } - for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) { - gsr_shader_deinit(&self->shaders[i]); + for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_COMPUTE_SHADERS; ++i) { + gsr_shader_deinit(&self->compute_shaders[i]); + } + + for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_GRAPHICS_SHADERS; ++i) { + gsr_shader_deinit(&self->graphics_shaders[i]); } self->params.egl = NULL; } -static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) { +static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2]) { /* rotation_matrix[0][0] = cos(angle); rotation_matrix[0][1] = -sin(angle); @@ -371,8 +700,6 @@ static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rot rotation_matrix[0][1] = -1.0f; rotation_matrix[1][0] = 1.0f; rotation_matrix[1][1] = 0.0f; - source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); - source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); break; case GSR_ROT_180: rotation_matrix[0][0] = -1.0f; @@ -385,8 +712,6 @@ static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rot rotation_matrix[0][1] = 1.0f; rotation_matrix[1][0] = -1.0f; rotation_matrix[1][1] = 0.0f; - source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); - source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); break; } } @@ -411,7 +736,7 @@ typedef enum { GSR_COLOR_COMP_RGB } gsr_color_component; -static int color_component_get_texture_index(gsr_color_component color_component) { +static int color_component_get_destination_texture_index(gsr_color_component color_component) { switch(color_component) { case GSR_COLOR_COMP_Y: return 0; case GSR_COLOR_COMP_UV: return 1; @@ -431,67 +756,205 @@ static unsigned int color_component_get_color_format(gsr_color_component color_c return GL_RGBA8; } -static int color_component_get_shader_index(gsr_color_component color_component, bool external_texture) { +static int color_component_get_COMPUTE_SHADER_INDEX(gsr_color_component color_component, bool external_texture, bool alpha_blending) { switch(color_component) { - case GSR_COLOR_COMP_Y: return external_texture ? SHADER_INDEX_Y_EXTERNAL : SHADER_INDEX_Y; - case GSR_COLOR_COMP_UV: return external_texture ? SHADER_INDEX_UV_EXTERNAL : SHADER_INDEX_UV; - case GSR_COLOR_COMP_RGB: return external_texture ? SHADER_INDEX_RGB_EXTERNAL : SHADER_INDEX_RGB; + case GSR_COLOR_COMP_Y: { + if(external_texture) + return alpha_blending ? COMPUTE_SHADER_INDEX_Y_EXTERNAL_BLEND : COMPUTE_SHADER_INDEX_Y_EXTERNAL; + else + return alpha_blending ? COMPUTE_SHADER_INDEX_Y_BLEND : COMPUTE_SHADER_INDEX_Y; + } + case GSR_COLOR_COMP_UV: { + if(external_texture) + return alpha_blending ? COMPUTE_SHADER_INDEX_UV_EXTERNAL_BLEND : COMPUTE_SHADER_INDEX_UV_EXTERNAL; + else + return alpha_blending ? COMPUTE_SHADER_INDEX_UV_BLEND : COMPUTE_SHADER_INDEX_UV; + } + case GSR_COLOR_COMP_RGB: { + if(external_texture) + return alpha_blending ? COMPUTE_SHADER_INDEX_RGB_EXTERNAL_BLEND : COMPUTE_SHADER_INDEX_RGB_EXTERNAL; + else + return alpha_blending ? COMPUTE_SHADER_INDEX_RGB_BLEND : COMPUTE_SHADER_INDEX_RGB; + } } assert(false); - return SHADER_INDEX_RGB; + return COMPUTE_SHADER_INDEX_RGB; } -static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { - const int shader_index = color_component_get_shader_index(color_component, external_texture); - const int texture_index = color_component_get_texture_index(color_component); +static void gsr_color_conversion_dispatch_compute_shader(gsr_color_conversion *self, bool external_texture, bool alpha_blending, float rotation_matrix[2][2], vec2i source_position, vec2i destination_pos, vec2i destination_size, vec2f scale, bool use_16bit_colors, gsr_color_component color_component) { + const int compute_shader_index = color_component_get_COMPUTE_SHADER_INDEX(color_component, external_texture, alpha_blending); + const int destination_texture_index = color_component_get_destination_texture_index(color_component); const unsigned int color_format = color_component_get_color_format(color_component, use_16bit_colors); self->params.egl->glActiveTexture(GL_TEXTURE1); - self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[texture_index]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[destination_texture_index]); self->params.egl->glActiveTexture(GL_TEXTURE0); - gsr_color_uniforms *uniform = &self->uniforms[shader_index]; - gsr_shader_use(&self->shaders[shader_index]); + gsr_color_compute_uniforms *uniform = &self->compute_uniforms[compute_shader_index]; + gsr_shader_use(&self->compute_shaders[compute_shader_index]); self->params.egl->glUniformMatrix2fv(uniform->rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); self->params.egl->glUniform2i(uniform->source_position, source_position.x, source_position.y); self->params.egl->glUniform2i(uniform->target_position, destination_pos.x, destination_pos.y); self->params.egl->glUniform2f(uniform->scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[destination_texture_index], 0, GL_FALSE, 0, GL_WRITE_ONLY, color_format); const double num_groups_x = ceil((double)destination_size.x/(double)self->max_local_size_dim); const double num_groups_y = ceil((double)destination_size.y/(double)self->max_local_size_dim); self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); } -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) { - vec2f scale = {0.0f, 0.0f}; - if(source_size.x > 0 && source_size.y > 0) - scale = (vec2f){ (double)destination_size.x/(double)source_size.x, (double)destination_size.y/(double)source_size.y }; - - vec2i source_position = {0, 0}; - float rotation_matrix[2][2] = {{0, 0}, {0, 0}}; - gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale); - - source_position.x -= (source_pos.x * scale.x + 0.5); - source_position.y -= (source_pos.y * scale.y + 0.5); +static void gsr_color_conversion_draw_graphics(gsr_color_conversion *self, unsigned int texture_id, bool external_texture, float rotation_matrix[2][2], vec2i source_position, vec2i source_size, vec2i destination_pos, vec2i texture_size, vec2f scale, gsr_source_color source_color) { + /* TODO: Do not call this every frame? */ + vec2i dest_texture_size = {0, 0}; + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]); + self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x); + self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D; + self->params.egl->glBindTexture(texture_target, texture_id); gsr_color_conversion_swizzle_texture_source(self, source_color); + const vec2f pos_norm = { + ((float)destination_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f, + ((float)destination_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f, + }; + + const vec2f size_norm = { + ((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f * scale.x, + ((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f * scale.y, + }; + + const vec2f texture_pos_norm = { + (float)source_position.x / (texture_size.x == 0 ? 1.0f : (float)texture_size.x), + (float)source_position.y / (texture_size.y == 0 ? 1.0f : (float)texture_size.y), + }; + + const vec2f texture_size_norm = { + (float)source_size.x / (texture_size.x == 0 ? 1.0f : (float)texture_size.x), + (float)source_size.y / (texture_size.y == 0 ? 1.0f : (float)texture_size.y), + }; + + const float vertices[] = { + -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, + -1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, + + -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y + }; + + self->params.egl->glBindVertexArray(self->vertex_array_object_id); + self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y); + + /* TODO: this, also cleanup */ + //self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id); + self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices); + switch(self->params.destination_color) { case GSR_DESTINATION_COLOR_NV12: case GSR_DESTINATION_COLOR_P010: { - const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]); + //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example) + + int shader_index = external_texture ? GRAPHICS_SHADER_INDEX_Y_EXTERNAL : GRAPHICS_SHADER_INDEX_Y; + gsr_shader_use(&self->graphics_shaders[shader_index]); + self->params.egl->glUniformMatrix2fv(self->graphics_uniforms[shader_index].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2f(self->graphics_uniforms[shader_index].offset, pos_norm.x, pos_norm.y); + self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); + + if(self->params.num_destination_textures > 1) { + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); + //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); + + shader_index = external_texture ? GRAPHICS_SHADER_INDEX_UV_EXTERNAL : GRAPHICS_SHADER_INDEX_UV; + gsr_shader_use(&self->graphics_shaders[shader_index]); + self->params.egl->glUniformMatrix2fv(self->graphics_uniforms[shader_index].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2f(self->graphics_uniforms[shader_index].offset, pos_norm.x, pos_norm.y); + self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); + } break; } case GSR_DESTINATION_COLOR_RGB8: { - gsr_color_conversion_dispatch_compute_shader(self, external_texture, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]); + //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example) + + const int shader_index = external_texture ? GRAPHICS_SHADER_INDEX_RGB_EXTERNAL : GRAPHICS_SHADER_INDEX_RGB; + gsr_shader_use(&self->graphics_shaders[shader_index]); + self->params.egl->glUniformMatrix2fv(self->graphics_uniforms[shader_index].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2f(self->graphics_uniforms[shader_index].offset, pos_norm.x, pos_norm.y); + self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); break; } } + self->params.egl->glBindVertexArray(0); + self->params.egl->glUseProgram(0); + gsr_color_conversion_swizzle_reset(self, source_color); + self->params.egl->glBindTexture(texture_target, 0); + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i source_pos, vec2i source_size, vec2i texture_size, gsr_rotation rotation, gsr_source_color source_color, bool external_texture, bool alpha_blending) { + assert(!external_texture || self->params.load_external_image_shader); + if(external_texture && !self->params.load_external_image_shader) { + fprintf(stderr, "gsr error: gsr_color_conversion_draw: external texture not loaded\n"); + return; + } + + vec2f scale = {0.0f, 0.0f}; + if(source_size.x > 0 && source_size.y > 0) + scale = (vec2f){ (double)destination_size.x/(double)source_size.x, (double)destination_size.y/(double)source_size.y }; + + vec2i source_position = {0, 0}; + float rotation_matrix[2][2] = {{0, 0}, {0, 0}}; + gsr_color_conversion_apply_rotation(rotation, rotation_matrix); + + const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D; + self->params.egl->glBindTexture(texture_target, texture_id); + gsr_color_conversion_swizzle_texture_source(self, source_color); + + const bool use_graphics_shader = external_texture ? self->external_compute_shaders_failed_to_load : self->compute_shaders_failed_to_load; + if(use_graphics_shader) { + source_position.x += source_pos.x; + source_position.y += source_pos.y; + gsr_color_conversion_draw_graphics(self, texture_id, external_texture, rotation_matrix, source_position, source_size, destination_pos, texture_size, scale, source_color); + // TODO: Is glFlush and glFinish needed here for graphics garbage? + } else { + switch(rotation) { + case GSR_ROT_0: + break; + case GSR_ROT_90: + source_position.x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x); + source_position.y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y); + break; + case GSR_ROT_180: + break; + case GSR_ROT_270: + source_position.x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x); + source_position.y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y); + break; + } + source_position.x -= (source_pos.x * scale.x + 0.5); + source_position.y -= (source_pos.y * scale.y + 0.5); + + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, use_16bit_colors, GSR_COLOR_COMP_Y); + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, (vec2i){source_position.x/2, source_position.y/2}, + (vec2i){destination_pos.x/2, destination_pos.y/2}, (vec2i){destination_size.x/2, destination_size.y/2}, scale, use_16bit_colors, GSR_COLOR_COMP_UV); + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + gsr_color_conversion_dispatch_compute_shader(self, external_texture, alpha_blending, rotation_matrix, source_position, destination_pos, destination_size, scale, false, GSR_COLOR_COMP_RGB); + break; + } + } + } + // TODO: Use the minimal barrier required self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT self->params.egl->glUseProgram(0); @@ -225,6 +225,14 @@ static bool gsr_egl_proc_load_egl(gsr_egl *self) { self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT"); self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT"); + self->glCreateMemoryObjectsEXT = (FUNC_glCreateMemoryObjectsEXT)self->eglGetProcAddress("glCreateMemoryObjectsEXT"); + self->glImportMemoryFdEXT = (FUNC_glImportMemoryFdEXT)self->eglGetProcAddress("glImportMemoryFdEXT"); + self->glIsMemoryObjectEXT = (FUNC_glIsMemoryObjectEXT)self->eglGetProcAddress("glIsMemoryObjectEXT"); + self->glTexStorageMem2DEXT = (FUNC_glTexStorageMem2DEXT)self->eglGetProcAddress("glTexStorageMem2DEXT"); + self->glBufferStorageMemEXT = (FUNC_glBufferStorageMemEXT)self->eglGetProcAddress("glBufferStorageMemEXT"); + self->glNamedBufferStorageMemEXT = (FUNC_glNamedBufferStorageMemEXT)self->eglGetProcAddress("glNamedBufferStorageMemEXT"); + self->glMemoryObjectParameterivEXT = (FUNC_glMemoryObjectParameterivEXT)self->eglGetProcAddress("glMemoryObjectParameterivEXT"); + if(!self->eglExportDMABUFImageQueryMESA) { fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n"); return false; diff --git a/src/encoder/video/vulkan.c b/src/encoder/video/vulkan.c index 7210870..7643ada 100644 --- a/src/encoder/video/vulkan.c +++ b/src/encoder/video/vulkan.c @@ -8,26 +8,15 @@ //#include <vulkan/vulkan_core.h> +#define GL_HANDLE_TYPE_OPAQUE_FD_EXT 0x9586 #define GL_TEXTURE_TILING_EXT 0x9580 #define GL_OPTIMAL_TILING_EXT 0x9584 #define GL_LINEAR_TILING_EXT 0x9585 -#define GL_PIXEL_PACK_BUFFER 0x88EB -#define GL_PIXEL_UNPACK_BUFFER 0x88EC -#define GL_STREAM_READ 0x88E1 -#define GL_STREAM_DRAW 0x88E0 -#define GL_READ_ONLY 0x88B8 -#define GL_WRITE_ONLY 0x88B9 -#define GL_READ_FRAMEBUFFER 0x8CA8 - typedef struct { gsr_video_encoder_vulkan_params params; unsigned int target_textures[2]; AVBufferRef *device_ctx; - AVVulkanDeviceContext* vv; - unsigned int pbo_y[2]; - unsigned int pbo_uv[2]; - AVFrame *sw_frame; } gsr_video_encoder_vulkan; static bool gsr_video_encoder_vulkan_setup_context(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) { @@ -84,6 +73,24 @@ static AVVulkanDeviceContext* video_codec_context_get_vulkan_data(AVCodecContext return (AVVulkanDeviceContext*)device_context->hwctx; } +static uint32_t get_memory_type_idx(VkPhysicalDevice pdev, const VkMemoryRequirements *mem_reqs, VkMemoryPropertyFlagBits prop_flags, PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties) { + VkPhysicalDeviceMemoryProperties pdev_mem_props; + uint32_t i; + + vkGetPhysicalDeviceMemoryProperties(pdev, &pdev_mem_props); + + for (i = 0; i < pdev_mem_props.memoryTypeCount; i++) { + const VkMemoryType *type = &pdev_mem_props.memoryTypes[i]; + + if ((mem_reqs->memoryTypeBits & (1 << i)) && + (type->propertyFlags & prop_flags) == prop_flags) { + return i; + break; + } + } + return UINT32_MAX; +} + static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context, AVFrame *frame) { const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); if(res < 0) { @@ -91,56 +98,133 @@ static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *se return false; } - //AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; - self->vv = video_codec_context_get_vulkan_data(video_codec_context); + while(self->params.egl->glGetError()) {} + + AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; + AVVulkanDeviceContext* vv = video_codec_context_get_vulkan_data(video_codec_context); + const size_t luma_size = frame->width * frame->height; + if(vv) { + PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vv->get_proc_addr(vv->inst, "vkGetImageMemoryRequirements"); + PFN_vkAllocateMemory vkAllocateMemory = (PFN_vkAllocateMemory)vv->get_proc_addr(vv->inst, "vkAllocateMemory"); + PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vv->get_proc_addr(vv->inst, "vkGetPhysicalDeviceMemoryProperties"); + PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vv->get_proc_addr(vv->inst, "vkGetMemoryFdKHR"); + + VkMemoryRequirements mem_reqs = {0}; + vkGetImageMemoryRequirements(vv->act_dev, target_surface_id->img[0], &mem_reqs); + + fprintf(stderr, "size: %lu, alignment: %lu, memory bits: 0x%08x\n", mem_reqs.size, mem_reqs.alignment, mem_reqs.memoryTypeBits); + VkDeviceMemory mem; + { + VkExportMemoryAllocateInfo exp_mem_info; + VkMemoryAllocateInfo mem_alloc_info; + VkMemoryDedicatedAllocateInfoKHR ded_info; + + memset(&exp_mem_info, 0, sizeof(exp_mem_info)); + exp_mem_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + exp_mem_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + memset(&ded_info, 0, sizeof(ded_info)); + ded_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + ded_info.image = target_surface_id->img[0]; + + exp_mem_info.pNext = &ded_info; + + memset(&mem_alloc_info, 0, sizeof(mem_alloc_info)); + mem_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + mem_alloc_info.pNext = &exp_mem_info; + mem_alloc_info.allocationSize = target_surface_id->size[0]; + mem_alloc_info.memoryTypeIndex = get_memory_type_idx(vv->phys_dev, &mem_reqs, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vkGetPhysicalDeviceMemoryProperties); + + if (mem_alloc_info.memoryTypeIndex == UINT32_MAX) { + fprintf(stderr, "No suitable memory type index found.\n"); + return VK_NULL_HANDLE; + } + + if (vkAllocateMemory(vv->act_dev, &mem_alloc_info, 0, &mem) != + VK_SUCCESS) + return VK_NULL_HANDLE; + + fprintf(stderr, "memory: %p\n", (void*)mem); - const unsigned int internal_formats_nv12[2] = { GL_RGBA8, GL_RGBA8 }; // TODO: GL_R8, GL_R16 - const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; - const unsigned int formats[2] = { GL_RED, GL_RG }; - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + } - for(int i = 0; i < 2; ++i) { - self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i], GL_NEAREST); - if(self->target_textures[i] == 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: failed to create opengl texture\n"); - return false; + fprintf(stderr, "target surface id: %p, %zu, %zu\n", (void*)target_surface_id->mem[0], target_surface_id->offset[0], target_surface_id->offset[1]); + fprintf(stderr, "vkGetMemoryFdKHR: %p\n", (void*)vkGetMemoryFdKHR); + + int fd = 0; + VkMemoryGetFdInfoKHR fd_info; + memset(&fd_info, 0, sizeof(fd_info)); + fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info.memory = target_surface_id->mem[0]; + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + if(vkGetMemoryFdKHR(vv->act_dev, &fd_info, &fd) != VK_SUCCESS) { + fprintf(stderr, "failed!\n"); + } else { + fprintf(stderr, "fd: %d\n", fd); } - } - self->params.egl->glGenBuffers(2, self->pbo_y); + fprintf(stderr, "glImportMemoryFdEXT: %p, size: %zu\n", (void*)self->params.egl->glImportMemoryFdEXT, target_surface_id->size[0]); + const int tiling = target_surface_id->tiling == VK_IMAGE_TILING_LINEAR ? GL_LINEAR_TILING_EXT : GL_OPTIMAL_TILING_EXT; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[0]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + if(tiling != GL_OPTIMAL_TILING_EXT) { + fprintf(stderr, "tiling %d is not supported, only GL_OPTIMAL_TILING_EXT (%d) is supported\n", tiling, GL_OPTIMAL_TILING_EXT); + } - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[1]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + unsigned int gl_memory_obj = 0; + self->params.egl->glCreateMemoryObjectsEXT(1, &gl_memory_obj); - self->params.egl->glGenBuffers(2, self->pbo_uv); + //const int dedicated = GL_TRUE; + //self->params.egl->glMemoryObjectParameterivEXT(gl_memory_obj, GL_DEDICATED_MEMORY_OBJECT_EXT, &dedicated); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[0]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + self->params.egl->glImportMemoryFdEXT(gl_memory_obj, target_surface_id->size[0], GL_HANDLE_TYPE_OPAQUE_FD_EXT, fd); + if(!self->params.egl->glIsMemoryObjectEXT(gl_memory_obj)) + fprintf(stderr, "failed to create object!\n"); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[1]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + fprintf(stderr, "gl memory obj: %u, error: %d\n", gl_memory_obj, self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + // fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError()); + // unsigned int vertex_buffer = 0; + // self->params.egl->glGenBuffers(1, &vertex_buffer); + // self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + // self->params.egl->glBufferStorageMemEXT(GL_ARRAY_BUFFER, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]); + // fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); - self->sw_frame = av_frame_alloc(); - self->sw_frame->format = AV_PIX_FMT_NV12; - self->sw_frame->width = frame->width; - self->sw_frame->height = frame->height; + // fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError()); + // unsigned int buffer = 0; + // self->params.egl->glCreateBuffers(1, &buffer); + // self->params.egl->glNamedBufferStorageMemEXT(buffer, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]); + // fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); - // TODO: Remove - if(av_frame_get_buffer(self->sw_frame, 0) < 0) { - fprintf(stderr, "failed to allocate sw frame\n"); - } + self->params.egl->glGenTextures(1, &self->target_textures[0]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[0]); + + fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling); + + fprintf(stderr, "tiling: %d\n", tiling); + + fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_R8, frame->width, frame->height, gl_memory_obj, target_surface_id->offset[0]); + + fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + self->params.egl->glGenTextures(1, &self->target_textures[1]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]); + + fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling); + + fprintf(stderr, "tiling: %d\n", tiling); + + fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_RG8, frame->width/2, frame->height/2, gl_memory_obj, target_surface_id->offset[0] + luma_size); + + fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + } - // TODO: Remove - if(av_frame_make_writable(self->sw_frame) < 0) { - fprintf(stderr, "failed to make writable\n"); - } return true; } @@ -185,91 +269,6 @@ void gsr_video_encoder_vulkan_stop(gsr_video_encoder_vulkan *self, AVCodecContex av_buffer_unref(&self->device_ctx); } -static void nop_free(void *opaque, uint8_t *data) { - -} - -static void gsr_video_encoder_vulkan_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { - gsr_video_encoder_vulkan *self = encoder->priv; - - static int counter = 0; - ++counter; - - // AVBufferRef *av_buffer_create(uint8_t *data, size_t size, - // void (*free)(void *opaque, uint8_t *data), - // void *opaque, int flags); - - while(self->params.egl->glGetError()){} - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[0]); - //fprintf(stderr, "1 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[counter % 2]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - self->params.egl->glReadPixels(0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, 0); - //fprintf(stderr, "2 gl err: %d\n", self->params.egl->glGetError()); - - const int next_pbo_y = (counter + 1) % 2; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - //fprintf(stderr, "3 gl err: %d\n", self->params.egl->glGetError()); - uint8_t *ptr_y = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - //fprintf(stderr, "4 gl err: %d\n", self->params.egl->glGetError()); - if(!ptr_y) { - fprintf(stderr, "failed to map buffer y!\n"); - } - - while(self->params.egl->glGetError()){} - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[1]); - //fprintf(stderr, "5 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[counter % 2]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); - //fprintf(stderr, "5.5 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glReadPixels(0, 0, frame->width/2, frame->height/2, GL_RG, GL_UNSIGNED_BYTE, 0); - //fprintf(stderr, "6 gl err: %d\n", self->params.egl->glGetError()); - - const int next_pbo_uv = (counter + 1) % 2; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[next_pbo_uv]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); - //fprintf(stderr, "7 gl err: %d\n", self->params.egl->glGetError()); - uint8_t *ptr_uv = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - //fprintf(stderr, "8 gl err: %d\n", self->params.egl->glGetError()); - if(!ptr_uv) { - fprintf(stderr, "failed to map buffer uv!\n"); - } - - //self->sw_frame->buf[0] = av_buffer_create(ptr_y, 3840 * 2160, nop_free, NULL, 0); - //self->sw_frame->buf[1] = av_buffer_create(ptr_uv, 1920 * 1080 * 2, nop_free, NULL, 0); - //self->sw_frame->data[0] = self->sw_frame->buf[0]->data; - //self->sw_frame->data[1] = self->sw_frame->buf[1]->data; - //self->sw_frame->extended_data[0] = self->sw_frame->data[0]; - //self->sw_frame->extended_data[1] = self->sw_frame->data[1]; - - self->sw_frame->data[0] = ptr_y; - self->sw_frame->data[1] = ptr_uv; - - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - - //self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]); - //self->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, sw_frame->data[1]); - - //self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); - - int ret = av_hwframe_transfer_data(frame, self->sw_frame, 0); - if(ret < 0) { - fprintf(stderr, "transfer data failed, error: %s\n", av_err2str(ret)); - } - - //av_buffer_unref(&self->sw_frame->buf[0]); - //av_buffer_unref(&self->sw_frame->buf[1]); - - //av_frame_free(&sw_frame); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); - self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_uv]); - self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -} - static void gsr_video_encoder_vulkan_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { gsr_video_encoder_vulkan *self = encoder->priv; textures[0] = self->target_textures[0]; @@ -299,7 +298,7 @@ gsr_video_encoder* gsr_video_encoder_vulkan_create(const gsr_video_encoder_vulka *encoder = (gsr_video_encoder) { .start = gsr_video_encoder_vulkan_start, - .copy_textures_to_frame = gsr_video_encoder_vulkan_copy_textures_to_frame, + .copy_textures_to_frame = NULL, .get_textures = gsr_video_encoder_vulkan_get_textures, .destroy = gsr_video_encoder_vulkan_destroy, .priv = encoder_vulkan diff --git a/src/main.cpp b/src/main.cpp index 6567551..75ed903 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -181,6 +181,11 @@ enum class BitrateMode { CBR }; +enum class Tune { + PERFORMANCE, + QUALITY +}; + static int x11_error_handler(Display*, XErrorEvent*) { return 0; } @@ -629,16 +634,16 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) { switch(video_quality) { case VideoQuality::MEDIUM: - codec_context->global_quality = 150 * quality_multiply; + codec_context->global_quality = 130 * quality_multiply; break; case VideoQuality::HIGH: - codec_context->global_quality = 120 * quality_multiply; + codec_context->global_quality = 110 * quality_multiply; break; case VideoQuality::VERY_HIGH: - codec_context->global_quality = 115 * quality_multiply; + codec_context->global_quality = 95 * quality_multiply; break; case VideoQuality::ULTRA: - codec_context->global_quality = 90 * quality_multiply; + codec_context->global_quality = 85 * quality_multiply; break; } } else if(codec_context->codec_id == AV_CODEC_ID_VP8) { @@ -758,7 +763,7 @@ static AVFrame* create_audio_frame(AVCodecContext *audio_codec_context) { return frame; } -static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, AVDictionary **options) { +static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, VideoCodec video_codec, AVDictionary **options) { #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(61, 17, 100) if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: Only for vaapi @@ -780,14 +785,15 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo av_dict_set(options, "profile", "main", 0); } #else + const bool use_nvidia_values = vendor == GSR_GPU_VENDOR_NVIDIA && !video_codec_is_vulkan(video_codec); if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: Only for vaapi //if(color_depth == GSR_COLOR_DEPTH_10_BITS) // av_dict_set_int(options, "profile", AV_PROFILE_H264_HIGH_10, 0); //else - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 2 : AV_PROFILE_H264_HIGH, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 2 : AV_PROFILE_H264_HIGH, 0); } else if(codec_context->codec_id == AV_CODEC_ID_AV1) { - if(vendor == GSR_GPU_VENDOR_NVIDIA) { + if(use_nvidia_values) { if(color_depth == GSR_COLOR_DEPTH_10_BITS) av_dict_set_int(options, "highbitdepth", 1, 0); } else { @@ -795,9 +801,9 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo } } else if(codec_context->codec_id == AV_CODEC_ID_HEVC) { if(color_depth == GSR_COLOR_DEPTH_10_BITS) - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 1 : AV_PROFILE_HEVC_MAIN_10, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 1 : AV_PROFILE_HEVC_MAIN_10, 0); else - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 0 : AV_PROFILE_HEVC_MAIN, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 0 : AV_PROFILE_HEVC_MAIN, 0); } #endif } @@ -862,7 +868,7 @@ static void open_video_software(AVCodecContext *codec_context, VideoQuality vide av_dict_set(&options, "preset", "veryfast", 0); av_dict_set(&options, "tune", "film", 0); - dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, &options); + dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, VideoCodec::H264, &options); if(codec_context->codec_id == AV_CODEC_ID_H264) { av_dict_set(&options, "coder", "cabac", 0); // TODO: cavlc is faster than cabac but worse compression. Which to use? @@ -1027,7 +1033,7 @@ static void video_hardware_set_qp(AVCodecContext *codec_context, VideoQuality vi } } -static void open_video_hardware(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr, gsr_color_depth color_depth, BitrateMode bitrate_mode, VideoCodec video_codec, bool low_power) { +static void open_video_hardware(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr, gsr_color_depth color_depth, BitrateMode bitrate_mode, VideoCodec video_codec, bool low_power, Tune tune) { (void)very_old_gpu; AVDictionary *options = nullptr; @@ -1038,9 +1044,14 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // TODO: Enable multipass - // TODO: Set "usage" option to "record"/"stream" and "content" option to "rendered" for vulkan encoding + dict_set_profile(codec_context, vendor, color_depth, video_codec, &options); - if(vendor == GSR_GPU_VENDOR_NVIDIA) { + if(video_codec_is_vulkan(video_codec)) { + av_dict_set_int(&options, "async_depth", 3, 0); + av_dict_set(&options, "tune", "hq", 0); + av_dict_set(&options, "usage", "record", 0); // TODO: Set to stream when streaming + av_dict_set(&options, "content", "rendered", 0); + } else if(vendor == GSR_GPU_VENDOR_NVIDIA) { // TODO: These dont seem to be necessary // av_dict_set_int(&options, "zerolatency", 1, 0); // if(codec_context->codec_id == AV_CODEC_ID_AV1) { @@ -1051,7 +1062,16 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // } av_dict_set(&options, "tune", "hq", 0); - dict_set_profile(codec_context, vendor, color_depth, &options); + switch(tune) { + case Tune::PERFORMANCE: + //av_dict_set(&options, "multipass", "qres", 0); + break; + case Tune::QUALITY: + av_dict_set(&options, "multipass", "fullres", 0); + av_dict_set(&options, "preset", "p6", 0); + av_dict_set_int(&options, "rc-lookahead", 0, 0); + break; + } if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: h264 10bit? @@ -1113,7 +1133,7 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide static void usage_header() { const bool inside_flatpak = getenv("FLATPAK_ID") != NULL; const char *program_name = inside_flatpak ? "flatpak run --command=gpu-screen-recorder com.dec05eba.gpu_screen_recorder" : "gpu-screen-recorder"; - printf("usage: %s -w <window_id|monitor|focused|portal|region> [-c <container_format>] [-s WxH] [-region WxH+X+Y] [-f <fps>] [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-restart-replay-on-save yes|no] [-k h264|hevc|av1|vp8|vp9|hevc_hdr|av1_hdr|hevc_10bit|av1_10bit] [-ac aac|opus|flac] [-ab <bitrate>] [-oc yes|no] [-fm cfr|vfr|content] [-bm auto|qp|vbr|cbr] [-cr limited|full] [-df yes|no] [-sc <script_path>] [-cursor yes|no] [-keyint <value>] [-restore-portal-session yes|no] [-portal-session-token-filepath filepath] [-encoder gpu|cpu] [-o <output_file>] [--list-capture-options [card_path] [vendor]] [--list-audio-devices] [--list-application-audio] [-v yes|no] [-gl-debug yes|no] [--version] [-h|--help]\n", program_name); + printf("usage: %s -w <window_id|monitor|focused|portal|region> [-c <container_format>] [-s WxH] [-region WxH+X+Y] [-f <fps>] [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-restart-replay-on-save yes|no] [-k h264|hevc|av1|vp8|vp9|hevc_hdr|av1_hdr|hevc_10bit|av1_10bit] [-ac aac|opus|flac] [-ab <bitrate>] [-oc yes|no] [-fm cfr|vfr|content] [-bm auto|qp|vbr|cbr] [-cr limited|full] [-tune performance|quality] [-df yes|no] [-sc <script_path>] [-cursor yes|no] [-keyint <value>] [-restore-portal-session yes|no] [-portal-session-token-filepath filepath] [-encoder gpu|cpu] [-o <output_file>] [--list-capture-options [card_path] [vendor]] [--list-audio-devices] [--list-application-audio] [-v yes|no] [-gl-debug yes|no] [--version] [-h|--help]\n", program_name); fflush(stdout); } @@ -1218,6 +1238,10 @@ static void usage_full() { printf(" Note that some buggy video players (such as vlc) are unable to correctly display videos in full color range and when upload the video to websites the website\n"); printf(" might re-encoder the video to make the video limited color range.\n"); printf("\n"); + printf(" -tune\n"); + printf(" Tune for performance or quality. Should be either 'performance' or 'quality'. At the moment this option only has an effect on Nvidia where setting this to quality\n"); + printf(" sets options such as preset, multipass and b frames. Optional, set to 'performance' by default.\n"); + printf("\n"); printf(" -df Organise replays in folders based on the current date.\n"); printf("\n"); printf(" -sc Run a script on the saved video file (asynchronously). The first argument to the script is the filepath to the saved video file and the second argument is the recording type (either \"regular\" or \"replay\").\n"); @@ -1296,7 +1320,7 @@ static void usage_full() { printf("NOTES:\n"); printf(" Send signal SIGINT to gpu-screen-recorder (Ctrl+C, or killall -SIGINT gpu-screen-recorder) to stop and save the recording. When in replay mode this stops recording without saving.\n"); printf(" Send signal SIGUSR1 to gpu-screen-recorder (killall -SIGUSR1 gpu-screen-recorder) to save a replay (when in replay mode).\n"); - printf(" Send signal SIGUSR2 to gpu-screen-recorder (killall -SIGUSR2 gpu-screen-recorder) to pause/unpause recording. Only applicable and useful when recording (not streaming nor replay).\n"); + printf(" Send signal SIGUSR2 to gpu-screen-recorder (killall -SIGUSR2 gpu-screen-recorder) to pause/unpause recording. Only applicable when recording (not streaming nor replay).\n"); printf("\n"); printf("EXAMPLES:\n"); printf(" %s -w screen -f 60 -a default_output -o video.mp4\n", program_name); @@ -1308,7 +1332,7 @@ static void usage_full() { printf(" %s -w screen -f 60 -a default_output -bm cbr -q 15000 -o video.mp4\n", program_name); printf(" %s -w screen -f 60 -a \"app:firefox|app:csgo\" -o video.mp4\n", program_name); printf(" %s -w screen -f 60 -a \"app-inverse:firefox|app-inverse:csgo\" -o video.mp4\n", program_name); - printf(" %s -w screen -f 60 -a \"default-input|app-inverse:Brave\" -o video.mp4\n", program_name); + printf(" %s -w screen -f 60 -a \"default_input|app-inverse:Brave\" -o video.mp4\n", program_name); printf(" %s -w screen -o image.jpg\n", program_name); printf(" %s -w screen -q medium -o image.jpg\n", program_name); printf(" %s -w region -region 640x480+100+100 -o video.mp4\n", program_name); @@ -3374,6 +3398,7 @@ int main(int argc, char **argv) { { "-df", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, { "-sc", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-cr", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, + { "-tune", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-cursor", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, { "-keyint", Arg { {}, is_optional, !is_list, ArgType::STRING, {false} } }, { "-restore-portal-session", Arg { {}, is_optional, !is_list, ArgType::BOOLEAN, {false} } }, @@ -3444,12 +3469,12 @@ int main(int argc, char **argv) { video_codec = VideoCodec::VP8; } else if(strcmp(video_codec_to_use, "vp9") == 0) { video_codec = VideoCodec::VP9; - //} else if(strcmp(video_codec_to_use, "h264_vulkan") == 0) { + // } else if(strcmp(video_codec_to_use, "h264_vulkan") == 0) { // video_codec = VideoCodec::H264_VULKAN; - //} else if(strcmp(video_codec_to_use, "hevc_vulkan") == 0) { + // } else if(strcmp(video_codec_to_use, "hevc_vulkan") == 0) { // video_codec = VideoCodec::HEVC_VULKAN; } else if(strcmp(video_codec_to_use, "auto") != 0) { - fprintf(stderr, "Error: -k should either be either 'auto', 'h264', 'hevc', 'av1', 'vp8', 'vp9', 'hevc_hdr', 'av1_hdr', 'hevc_10bit' or 'av1_10bit', got: '%s'\n", video_codec_to_use); + fprintf(stderr, "Error: -k should either be 'auto', 'h264', 'hevc', 'av1', 'vp8', 'vp9', 'hevc_hdr', 'av1_hdr', 'hevc_10bit' or 'av1_10bit', got: '%s'\n", video_codec_to_use); usage(); } @@ -3465,7 +3490,7 @@ int main(int argc, char **argv) { } else if(strcmp(audio_codec_to_use, "flac") == 0) { audio_codec = AudioCodec::FLAC; } else { - fprintf(stderr, "Error: -ac should either be either 'aac', 'opus' or 'flac', got: '%s'\n", audio_codec_to_use); + fprintf(stderr, "Error: -ac should either be 'aac', 'opus' or 'flac', got: '%s'\n", audio_codec_to_use); usage(); } @@ -3564,7 +3589,7 @@ int main(int argc, char **argv) { } else if(strcmp(pixfmt, "yuv444") == 0) { pixel_format = PixelFormat::YUV444; } else { - fprintf(stderr, "Error: -pixfmt should either be either 'yuv420', or 'yuv444', got: '%s'\n", pixfmt); + fprintf(stderr, "Error: -pixfmt should either be 'yuv420', or 'yuv444', got: '%s'\n", pixfmt); usage(); } @@ -3685,6 +3710,11 @@ int main(int argc, char **argv) { _exit(1); } + gsr_shader_enable_debug_output(gl_debug); +#ifndef NDEBUG + gsr_shader_enable_debug_output(true); +#endif + if(egl.gpu_info.is_steam_deck) { fprintf(stderr, "gsr warning: steam deck has multiple driver issues. One of them has been reported here: https://github.com/ValveSoftware/SteamOS/issues/1609\n" "If you have issues with GPU Screen Recorder on steam deck that you don't have on a desktop computer then report the issue to Valve and/or AMD.\n"); @@ -3736,7 +3766,7 @@ int main(int argc, char **argv) { } else if(strcmp(framerate_mode_str, "content") == 0) { framerate_mode = FramerateMode::CONTENT; } else { - fprintf(stderr, "Error: -fm should either be either 'cfr', 'vfr' or 'content', got: '%s'\n", framerate_mode_str); + fprintf(stderr, "Error: -fm should either be 'cfr', 'vfr' or 'content', got: '%s'\n", framerate_mode_str); usage(); } @@ -3757,7 +3787,7 @@ int main(int argc, char **argv) { } else if(strcmp(bitrate_mode_str, "cbr") == 0) { bitrate_mode = BitrateMode::CBR; } else if(strcmp(bitrate_mode_str, "auto") != 0) { - fprintf(stderr, "Error: -bm should either be either 'auto', 'qp', 'vbr' or 'cbr', got: '%s'\n", bitrate_mode_str); + fprintf(stderr, "Error: -bm should either be 'auto', 'qp', 'vbr' or 'cbr', got: '%s'\n", bitrate_mode_str); usage(); } @@ -3810,7 +3840,7 @@ int main(int argc, char **argv) { } else if(strcmp(quality_str, "ultra") == 0) { quality = VideoQuality::ULTRA; } else { - fprintf(stderr, "Error: -q should either be either 'medium', 'high', 'very_high' or 'ultra', got: '%s'\n", quality_str); + fprintf(stderr, "Error: -q should either be 'medium', 'high', 'very_high' or 'ultra', got: '%s'\n", quality_str); usage(); } } @@ -3825,7 +3855,21 @@ int main(int argc, char **argv) { } else if(strcmp(color_range_str, "full") == 0) { color_range = GSR_COLOR_RANGE_FULL; } else { - fprintf(stderr, "Error: -cr should either be either 'limited' or 'full', got: '%s'\n", color_range_str); + fprintf(stderr, "Error: -cr should either be 'limited' or 'full', got: '%s'\n", color_range_str); + usage(); + } + + Tune tune = Tune::PERFORMANCE; + const char *tune_str = args["-tune"].value(); + if(!tune_str) + tune_str = "performance"; + + if(strcmp(tune_str, "performance") == 0) { + tune = Tune::PERFORMANCE; + } else if(strcmp(tune_str, "quality") == 0) { + tune = Tune::QUALITY; + } else { + fprintf(stderr, "Error: -tune should either be 'performance' or 'quality', got: '%s'\n", tune_str); usage(); } @@ -3843,7 +3887,7 @@ int main(int argc, char **argv) { } if(output_resolution.x < 0 || output_resolution.y < 0) { - fprintf(stderr, "Error: invalud value for option -s '%s', expected width and height to be greater or equal to 0\n", output_resolution_str); + fprintf(stderr, "Error: invalid value for option -s '%s', expected width and height to be greater or equal to 0\n", output_resolution_str); usage(); } } @@ -3863,7 +3907,7 @@ int main(int argc, char **argv) { } if(region_size.x < 0 || region_size.y < 0 || region_position.x < 0 || region_position.y < 0) { - fprintf(stderr, "Error: invalud value for option -region '%s', expected width, height, x and y to be greater or equal to 0\n", region_str); + fprintf(stderr, "Error: invalid value for option -region '%s', expected width, height, x and y to be greater or equal to 0\n", region_str); usage(); } } else { @@ -3990,6 +4034,9 @@ int main(int argc, char **argv) { if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); + if(tune == Tune::QUALITY) + video_codec_context->max_b_frames = 2; + AVFrame *video_frame = av_frame_alloc(); if(!video_frame) { fprintf(stderr, "Error: Failed to allocate video frame\n"); @@ -4054,7 +4101,7 @@ int main(int argc, char **argv) { if(use_software_video_encoder) { open_video_software(video_codec_context, quality, pixel_format, hdr, color_depth, bitrate_mode); } else { - open_video_hardware(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr, color_depth, bitrate_mode, video_codec, low_power); + open_video_hardware(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr, color_depth, bitrate_mode, video_codec, low_power, tune); } if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); diff --git a/src/shader.c b/src/shader.c index b9fbb62..ba4db80 100644 --- a/src/shader.c +++ b/src/shader.c @@ -3,14 +3,16 @@ #include <stdio.h> #include <assert.h> +static bool print_compile_errors = false; + static int min_int(int a, int b) { return a < b ? a : b; } -static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *source) { +static unsigned int load_shader(gsr_egl *egl, unsigned int type, const char *source) { unsigned int shader_id = egl->glCreateShader(type); if(shader_id == 0) { - fprintf(stderr, "gsr error: loader_shader: failed to create shader, error: %d\n", egl->glGetError()); + fprintf(stderr, "gsr error: load_shader: failed to create shader, error: %d\n", egl->glGetError()); return 0; } @@ -23,10 +25,10 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s int info_length = 0; egl->glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_length); - if(info_length > 1) { + if(info_length > 1 && print_compile_errors) { char info_log[4096]; egl->glGetShaderInfoLog(shader_id, min_int(4096, info_length), NULL, info_log); - fprintf(stderr, "gsr error: loader shader: failed to compile shader, error:\n%s\nshader source:\n%s\n", info_log, source); + fprintf(stderr, "gsr error: load_shader: failed to compile shader, error:\n%s\nshader source:\n%s\n", info_log, source); } egl->glDeleteShader(shader_id); @@ -45,19 +47,19 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const bool success = false; if(vertex_shader) { - vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader); + vertex_shader_id = load_shader(egl, GL_VERTEX_SHADER, vertex_shader); if(vertex_shader_id == 0) goto done; } if(fragment_shader) { - fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader); + fragment_shader_id = load_shader(egl, GL_FRAGMENT_SHADER, fragment_shader); if(fragment_shader_id == 0) goto done; } if(compute_shader) { - compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader); + compute_shader_id = load_shader(egl, GL_COMPUTE_SHADER, compute_shader); if(compute_shader_id == 0) goto done; } @@ -151,3 +153,7 @@ void gsr_shader_use(gsr_shader *self) { void gsr_shader_use_none(gsr_shader *self) { self->egl->glUseProgram(0); } + +void gsr_shader_enable_debug_output(bool enable) { + print_compile_errors = enable; +} diff --git a/src/utils.c b/src/utils.c index 4aab138..9b4a4b6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -362,13 +362,9 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) { bool supported = true; const unsigned char *gl_vendor = egl->glGetString(GL_VENDOR); const unsigned char *gl_renderer = egl->glGetString(GL_RENDERER); - const unsigned char *gl_version = egl->glGetString(GL_VERSION); info->gpu_version = 0; info->is_steam_deck = false; - info->driver_major = 0; - info->driver_minor = 0; - info->driver_patch = 0; if(!gl_vendor) { fprintf(stderr, "gsr error: failed to get gpu vendor\n"); @@ -408,21 +404,6 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) { info->is_steam_deck = strstr((const char*)gl_renderer, "vangogh") != NULL; } - if(gl_version) { - const char *mesa_p = strstr((const char*)gl_version, "Mesa "); - if(mesa_p) { - mesa_p += 5; - int major = 0; - int minor = 0; - int patch = 0; - if(sscanf(mesa_p, "%d.%d.%d", &major, &minor, &patch) == 3) { - info->driver_major = major; - info->driver_minor = minor; - info->driver_patch = patch; - } - } - } - end: return supported; } diff --git a/src/window/wayland.c b/src/window/wayland.c index ba7b547..efc7bbe 100644 --- a/src/window/wayland.c +++ b/src/window/wayland.c @@ -9,27 +9,32 @@ #include <stdint.h> #include <wayland-client.h> #include <wayland-egl.h> +#include "xdg-output-unstable-v1-client-protocol.h" #define GSR_MAX_OUTPUTS 32 +typedef struct gsr_window_wayland gsr_window_wayland; + typedef struct { uint32_t wl_name; - void *output; + struct wl_output *output; + struct zxdg_output_v1 *xdg_output; vec2i pos; vec2i size; int32_t transform; char *name; } gsr_wayland_output; -typedef struct { - void *display; - void *window; - void *registry; - void *surface; - void *compositor; +struct gsr_window_wayland { + struct wl_display *display; + struct wl_egl_window *window; + struct wl_registry *registry; + struct wl_surface *surface; + struct wl_compositor *compositor; gsr_wayland_output outputs[GSR_MAX_OUTPUTS]; int num_outputs; -} gsr_window_wayland; + struct zxdg_output_manager_v1 *xdg_output_manager; +}; static void output_handle_geometry(void *data, struct wl_output *wl_output, int32_t x, int32_t y, int32_t phys_width, int32_t phys_height, @@ -95,7 +100,7 @@ static const struct wl_output_listener output_listener = { static void registry_add_object(void *data, struct wl_registry *registry, uint32_t name, const char *interface, uint32_t version) { (void)version; gsr_window_wayland *window_wayland = data; - if (strcmp(interface, "wl_compositor") == 0) { + if(strcmp(interface, "wl_compositor") == 0) { if(window_wayland->compositor) { wl_compositor_destroy(window_wayland->compositor); window_wayland->compositor = NULL; @@ -103,7 +108,7 @@ static void registry_add_object(void *data, struct wl_registry *registry, uint32 window_wayland->compositor = wl_registry_bind(registry, name, &wl_compositor_interface, 1); } else if(strcmp(interface, wl_output_interface.name) == 0) { if(version < 4) { - fprintf(stderr, "gsr warning: wl output interface version is < 4, expected >= 4 to capture a monitor. Using KMS capture instead\n"); + fprintf(stderr, "gsr warning: wl output interface version is < 4, expected >= 4 to capture a monitor\n"); return; } @@ -123,6 +128,17 @@ static void registry_add_object(void *data, struct wl_registry *registry, uint32 .name = NULL, }; wl_output_add_listener(gsr_output->output, &output_listener, gsr_output); + } else if(strcmp(interface, zxdg_output_manager_v1_interface.name) == 0) { + if(version < 1) { + fprintf(stderr, "gsr warning: xdg output interface version is < 1, expected >= 1 to capture a monitor\n"); + return; + } + + if(window_wayland->xdg_output_manager) { + zxdg_output_manager_v1_destroy(window_wayland->xdg_output_manager); + window_wayland->xdg_output_manager = NULL; + } + window_wayland->xdg_output_manager = wl_registry_bind(registry, name, &zxdg_output_manager_v1_interface, 1); } } @@ -130,6 +146,7 @@ static void registry_remove_object(void *data, struct wl_registry *registry, uin (void)data; (void)registry; (void)name; + // TODO: Remove output } static struct wl_registry_listener registry_listener = { @@ -137,6 +154,60 @@ static struct wl_registry_listener registry_listener = { .global_remove = registry_remove_object, }; +static void xdg_output_logical_position(void *data, struct zxdg_output_v1 *zxdg_output_v1, int32_t x, int32_t y) { + (void)zxdg_output_v1; + gsr_wayland_output *gsr_xdg_output = data; + gsr_xdg_output->pos.x = x; + gsr_xdg_output->pos.y = y; +} + +static void xdg_output_handle_logical_size(void *data, struct zxdg_output_v1 *xdg_output, int32_t width, int32_t height) { + (void)data; + (void)xdg_output; + (void)width; + (void)height; +} + +static void xdg_output_handle_done(void *data, struct zxdg_output_v1 *xdg_output) { + (void)data; + (void)xdg_output; +} + +static void xdg_output_handle_name(void *data, struct zxdg_output_v1 *xdg_output, const char *name) { + (void)data; + (void)xdg_output; + (void)name; +} + +static void xdg_output_handle_description(void *data, struct zxdg_output_v1 *xdg_output, const char *description) { + (void)data; + (void)xdg_output; + (void)description; +} + +static const struct zxdg_output_v1_listener xdg_output_listener = { + .logical_position = xdg_output_logical_position, + .logical_size = xdg_output_handle_logical_size, + .done = xdg_output_handle_done, + .name = xdg_output_handle_name, + .description = xdg_output_handle_description, +}; + +static void gsr_window_wayland_set_monitor_outputs_from_xdg_output(gsr_window_wayland *self) { + if(!self->xdg_output_manager) { + fprintf(stderr, "gsr warning: zxdg_output_manager not found. registered monitor positions might be incorrect\n"); + return; + } + + for(int i = 0; i < self->num_outputs; ++i) { + self->outputs[i].xdg_output = zxdg_output_manager_v1_get_xdg_output(self->xdg_output_manager, self->outputs[i].output); + zxdg_output_v1_add_listener(self->outputs[i].xdg_output, &xdg_output_listener, &self->outputs[i]); + } + + // Fetch xdg_output + wl_display_roundtrip(self->display); +} + static void gsr_window_wayland_deinit(gsr_window_wayland *self) { if(self->window) { wl_egl_window_destroy(self->window); @@ -158,9 +229,19 @@ static void gsr_window_wayland_deinit(gsr_window_wayland *self) { free(self->outputs[i].name); self->outputs[i].name = NULL; } + + if(self->outputs[i].xdg_output) { + zxdg_output_v1_destroy(self->outputs[i].xdg_output); + self->outputs[i].output = NULL; + } } self->num_outputs = 0; + if(self->xdg_output_manager) { + zxdg_output_manager_v1_destroy(self->xdg_output_manager); + self->xdg_output_manager = NULL; + } + if(self->compositor) { wl_compositor_destroy(self->compositor); self->compositor = NULL; @@ -193,6 +274,8 @@ static bool gsr_window_wayland_init(gsr_window_wayland *self) { // Fetch wl_output wl_display_roundtrip(self->display); + gsr_window_wayland_set_monitor_outputs_from_xdg_output(self); + if(!self->compositor) { fprintf(stderr, "gsr error: gsr_window_wayland_init failed: failed to find compositor\n"); goto fail; |