diff options
author | dec05eba <dec05eba@protonmail.com> | 2025-03-29 13:27:20 +0100 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2025-03-29 13:29:10 +0100 |
commit | a41a32cb9079dc3c78414b7b0cdc68243333020d (patch) | |
tree | 5776bf16deb9f4e18cf744d474e0d1f0dc19c081 | |
parent | 634a563bc0e7d23040e70f90e7ccc498aa7db41e (diff) |
WIP: use compute shader instead of graphics shader for better performance (especially on amd)
-rw-r--r-- | include/color_conversion.h | 26 | ||||
-rw-r--r-- | include/egl.h | 14 | ||||
-rw-r--r-- | include/image_writer.h | 2 | ||||
-rw-r--r-- | include/shader.h | 2 | ||||
-rw-r--r-- | include/utils.h | 2 | ||||
-rw-r--r-- | src/capture/kms.c | 101 | ||||
-rw-r--r-- | src/capture/nvfbc.c | 10 | ||||
-rw-r--r-- | src/capture/portal.c | 61 | ||||
-rw-r--r-- | src/capture/xcomposite.c | 35 | ||||
-rw-r--r-- | src/capture/ximage.c | 4 | ||||
-rw-r--r-- | src/color_conversion.c | 516 | ||||
-rw-r--r-- | src/egl.c | 6 | ||||
-rw-r--r-- | src/encoder/video/software.c | 4 | ||||
-rw-r--r-- | src/encoder/video/vaapi.c | 4 | ||||
-rw-r--r-- | src/image_writer.c | 10 | ||||
-rw-r--r-- | src/main.cpp | 5 | ||||
-rw-r--r-- | src/shader.c | 48 | ||||
-rw-r--r-- | src/utils.c | 237 |
18 files changed, 336 insertions, 751 deletions
diff --git a/include/color_conversion.h b/include/color_conversion.h index b80be21..37eeb8e 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -2,9 +2,12 @@ #define GSR_COLOR_CONVERSION_H #include "shader.h" +#include "defs.h" #include "vec2.h" #include <stdbool.h> +#define GSR_COLOR_CONVERSION_MAX_SHADERS 3 + typedef enum { GSR_COLOR_RANGE_LIMITED, GSR_COLOR_RANGE_FULL @@ -26,9 +29,18 @@ typedef enum { GSR_DESTINATION_COLOR_RGB8 } gsr_destination_color; +typedef enum { + GSR_ROT_0, + GSR_ROT_90, + GSR_ROT_180, + GSR_ROT_270 +} gsr_rotation; + typedef struct { - int offset; - int rotation; + int rotation_matrix; + int source_position; + int target_position; + int scale; } gsr_color_uniforms; typedef struct { @@ -45,19 +57,23 @@ typedef struct { typedef struct { gsr_color_conversion_params params; - gsr_color_uniforms uniforms[4]; - gsr_shader shaders[4]; + gsr_color_uniforms uniforms[GSR_COLOR_CONVERSION_MAX_SHADERS]; + gsr_shader shaders[GSR_COLOR_CONVERSION_MAX_SHADERS]; unsigned int framebuffers[2]; unsigned int vertex_array_object_id; unsigned int vertex_buffer_object_id; + + int max_local_size_dim; } gsr_color_conversion; int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params); void gsr_color_conversion_deinit(gsr_color_conversion *self); -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color); +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color); void gsr_color_conversion_clear(gsr_color_conversion *self); +gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation); + #endif /* GSR_COLOR_CONVERSION_H */ diff --git a/include/egl.h b/include/egl.h index 0d08270..643ab30 100644 --- a/include/egl.h +++ b/include/egl.h @@ -98,7 +98,7 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_TEXTURE_EXTERNAL_OES 0x8D65 #define GL_RED 0x1903 #define GL_GREEN 0x1904 -#define GL_BLUE 0x1905 +#define GL_BLUE 0x1905 #define GL_ALPHA 0x1906 #define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 #define GL_RG 0x8227 @@ -111,6 +111,7 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_R16 0x822A #define GL_RG16 0x822C #define GL_RGB16 0x8054 +#define GL_RGBA32F 0x8814 #define GL_UNSIGNED_BYTE 0x1401 #define GL_COLOR_BUFFER_BIT 0x00004000 #define GL_TEXTURE_WRAP_S 0x2802 @@ -134,6 +135,10 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_SCISSOR_TEST 0x0C11 #define GL_PACK_ALIGNMENT 0x0D05 #define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_WRITE 0x88BA +#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS 0x90EB #define GL_VENDOR 0x1F00 #define GL_RENDERER 0x1F01 @@ -143,6 +148,7 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_INFO_LOG_LENGTH 0x8B84 #define GL_FRAGMENT_SHADER 0x8B30 #define GL_VERTEX_SHADER 0x8B31 +#define GL_COMPUTE_SHADER 0x91B9 #define GL_COMPILE_STATUS 0x8B81 #define GL_LINK_STATUS 0x8B82 @@ -231,6 +237,7 @@ struct gsr_egl { void (*glGenTextures)(int n, unsigned int *textures); void (*glDeleteTextures)(int n, const unsigned int *texture); void (*glBindTexture)(unsigned int target, unsigned int texture); + void (*glBindImageTexture)(unsigned int unit, unsigned int texture, int level, unsigned char layered, int layer, unsigned int access, unsigned int format); void (*glTexParameteri)(unsigned int target, unsigned int pname, int param); void (*glTexParameteriv)(unsigned int target, unsigned int pname, const int *params); void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params); @@ -240,6 +247,8 @@ struct gsr_egl { void (*glGenFramebuffers)(int n, unsigned int *framebuffers); void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer); void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers); + void (*glDispatchCompute)(unsigned int num_groups_x, unsigned int num_groups_y, unsigned int num_groups_z); + void (*glMemoryBarrier)(unsigned int barriers); void (*glViewport)(int x, int y, int width, int height); void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level); void (*glDrawBuffers)(int n, const unsigned int *bufs); @@ -276,11 +285,14 @@ struct gsr_egl { int (*glGetUniformLocation)(unsigned int program, const char *name); void (*glUniform1f)(int location, float v0); void (*glUniform2f)(int location, float v0, float v1); + void (*glUniform2i)(int location, int v0, int v1); + void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value); void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam); void (*glScissor)(int x, int y, int width, int height); void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels); void* (*glMapBuffer)(unsigned int target, unsigned int access); unsigned char (*glUnmapBuffer)(unsigned int target); + void (*glGetIntegerv)(unsigned int pname, int *params); }; bool gsr_egl_load(gsr_egl *self, gsr_window *window, bool is_monitor_capture, bool enable_debug); diff --git a/include/image_writer.h b/include/image_writer.h index 400edd0..65e7497 100644 --- a/include/image_writer.h +++ b/include/image_writer.h @@ -25,7 +25,7 @@ typedef struct { } gsr_image_writer; bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int width, int height); -/* |memory| is taken as a reference */ +/* |memory| is taken as a reference. The data is expected to be in rgba8 format (8 bit rgba) */ bool gsr_image_writer_init_memory(gsr_image_writer *self, const void *memory, int width, int height); void gsr_image_writer_deinit(gsr_image_writer *self); diff --git a/include/shader.h b/include/shader.h index 57d1096..8bc1104 100644 --- a/include/shader.h +++ b/include/shader.h @@ -9,7 +9,7 @@ typedef struct { } gsr_shader; /* |vertex_shader| or |fragment_shader| may be NULL */ -int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader); +int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader); void gsr_shader_deinit(gsr_shader *self); int gsr_shader_bind_attribute_location(gsr_shader *self, const char *attribute, int location); diff --git a/include/utils.h b/include/utils.h index fd340e8..873e6e4 100644 --- a/include/utils.h +++ b/include/utils.h @@ -64,8 +64,6 @@ int create_directory_recursive(char *path); /* |img_attr| needs to be at least 44 in size */ void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes, bool use_modifier); bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context); -bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes); -bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame); vec2i scale_keep_aspect_ratio(vec2i from, vec2i to); diff --git a/src/capture/kms.c b/src/capture/kms.c index 578fded..e98fab7 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -53,10 +53,6 @@ typedef struct { bool is_x11; gsr_cursor x11_cursor; - bool performance_error_shown; - bool fast_path_failed; - bool mesa_supports_compute_only_vaapi_copy; - //int drm_fd; //uint64_t prev_sequence; //bool damaged; @@ -229,17 +225,6 @@ static int gsr_capture_kms_start(gsr_capture *cap, gsr_capture_metadata *capture capture_metadata->height = self->capture_size.y; } - self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); - if(self->fast_path_failed) - fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); - - //if(self->params.hdr) { - // self->fast_path_failed = true; - // fprintf(stderr, "gsr warning: gsr_capture_kms_start: recording with hdr requires shader color conversion which might be slow. If this is an issue record with -w portal instead (which converts HDR to SDR)\n"); - //} - - self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6); - self->last_time_monitor_check = clock_get_monotonic_seconds(); return 0; } @@ -274,16 +259,6 @@ static void gsr_capture_kms_on_event(gsr_capture *cap, gsr_egl *egl) { // } // } -static float monitor_rotation_to_radians(gsr_monitor_rotation rot) { - switch(rot) { - case GSR_MONITOR_ROT_0: return 0.0f; - case GSR_MONITOR_ROT_90: return M_PI_2; - case GSR_MONITOR_ROT_180: return M_PI; - case GSR_MONITOR_ROT_270: return M_PI + M_PI_2; - } - return 0.0f; -} - static gsr_kms_response_item* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) { for(int i = 0; i < kms_response->num_items; ++i) { if(kms_response->items[i].connector_id == connector_id && !kms_response->items[i].is_cursor) @@ -449,7 +424,7 @@ static gsr_kms_response_item* find_cursor_drm_if_on_monitor(gsr_capture_kms *sel return cursor_drm_fd; } -static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, float texture_rotation, vec2i output_size, vec2i framebuffer_size) { +static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, vec2i output_size, vec2i framebuffer_size) { const vec2d scale = { self->capture_size.x == 0 ? 0 : (double)output_size.x / (double)self->capture_size.x, self->capture_size.y == 0 ? 0 : (double)output_size.y / (double)self->capture_size.y @@ -523,7 +498,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->cursor_texture_id, cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y}, (vec2i){0, 0}, cursor_size, - texture_rotation, cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); + gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -551,7 +526,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id, cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y}, (vec2i){0, 0}, self->x11_cursor.size, - 0.0f, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -604,16 +579,6 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) { self->capture_size = rotate_capture_size_if_rotated(self, monitor.size); } -static void gsr_capture_kms_fail_fast_path_if_not_fast(gsr_capture_kms *self, uint32_t pixel_format) { - const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF; - if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) { - self->fast_path_failed = true; - fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used." - " If you experience performance problems in the video then record on a single window on X11 or use portal capture option instead or disable 10/12-bit color option in your desktop environment settings," - " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n"); - } -} - static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) { gsr_capture_kms *self = cap->priv; @@ -645,15 +610,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata)) gsr_kms_set_hdr_metadata(self, drm_fd); - if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { - self->performance_error_shown = true; - self->fast_path_failed = true; - fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used." - " If you experience performance problems in the video then record a single window on X11 or use portal capture option instead\n"); - } - - gsr_capture_kms_fail_fast_path_if_not_fast(self, drm_fd->pixel_format); - self->capture_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h }); if(self->params.region_size.x > 0 && self->params.region_size.y > 0) self->capture_size = self->params.region_size; @@ -662,7 +618,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size; output_size = scale_keep_aspect_ratio(self->capture_size, output_size); - const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation); const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) }; gsr_capture_kms_update_capture_size_change(self, color_conversion, target_pos, drm_fd); @@ -673,42 +628,20 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu capture_pos.x += self->params.region_position.x; capture_pos.y += self->params.region_position.y; - self->params.egl->glFlush(); - self->params.egl->glFinish(); - - /* Fast opengl free path */ - if(!self->fast_path_failed && self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { - int fds[4]; - uint32_t offsets[4]; - uint32_t pitches[4]; - uint64_t modifiers[4]; - for(int i = 0; i < drm_fd->num_dma_bufs; ++i) { - fds[i] = drm_fd->dma_buf[i].fd; - offsets[i] = drm_fd->dma_buf[i].offset; - pitches[i] = drm_fd->dma_buf[i].pitch; - modifiers[i] = drm_fd->modifier; - } - if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){capture_pos.x, capture_pos.y}, self->capture_size, target_pos, output_size, drm_fd->pixel_format, (vec2i){drm_fd->width, drm_fd->height}, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs)) { - fprintf(stderr, "gsr error: gsr_capture_kms_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); - self->fast_path_failed = true; - } - } else { - self->fast_path_failed = true; - } - - if(self->fast_path_failed) { - EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); - if(image) { - gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); - self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); - } + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); - gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, - target_pos, output_size, - capture_pos, self->capture_size, - texture_rotation, self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); + if(image) { + gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); + self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); } + gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, + target_pos, output_size, + capture_pos, self->capture_size, + gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); // The cursor is handled by x11 on x11 instead of using the cursor drm plane because on prime systems with a dedicated nvidia gpu @@ -722,12 +655,12 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu render_x11_cursor(self, color_conversion, cursor_monitor_offset, target_pos, output_size); } else if(cursor_drm_fd) { const vec2i framebuffer_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h }); - render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, texture_rotation, output_size, framebuffer_size); + render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, output_size, framebuffer_size); } } - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); gsr_capture_kms_cleanup_kms_fds(self); diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index b6a3671..5f47b00 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -390,16 +390,16 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap return 0; } - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex], target_pos, (vec2i){output_size.x, output_size.y}, self->params.region_position, frame_size, - 0.0f, false, GSR_SOURCE_COLOR_BGR); + GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR); - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); return 0; } diff --git a/src/capture/portal.c b/src/capture/portal.c index e065f02..56072d8 100644 --- a/src/capture/portal.c +++ b/src/capture/portal.c @@ -23,9 +23,6 @@ typedef struct { vec2i capture_size; gsr_pipewire_video_dmabuf_data dmabuf_data[GSR_PIPEWIRE_VIDEO_DMABUF_MAX_PLANES]; int num_dmabuf_data; - - bool fast_path_failed; - bool mesa_supports_compute_only_vaapi_copy; } gsr_capture_portal; static void gsr_capture_portal_cleanup_plane_fds(gsr_capture_portal *self) { @@ -305,12 +302,6 @@ static int gsr_capture_portal_start(gsr_capture *cap, gsr_capture_metadata *capt capture_metadata->height = self->params.output_resolution.y; } - self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); - if(self->fast_path_failed) - fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); - - self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6); - return 0; } @@ -318,16 +309,6 @@ static int max_int(int a, int b) { return a > b ? a : b; } -static void gsr_capture_portal_fail_fast_path_if_not_fast(gsr_capture_portal *self, uint32_t pixel_format) { - const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF; - if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) { - self->fast_path_failed = true; - fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used." - " If you experience performance problems in the video then record on a single window on X11 instead or disable 10/12-bit color option in your desktop environment settings," - " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n"); - } -} - static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) { (void)color_conversion; gsr_capture_portal *self = cap->priv; @@ -348,45 +329,21 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca return 0; } - gsr_capture_portal_fail_fast_path_if_not_fast(self, pipewire_fourcc); - const bool is_scaled = self->params.output_resolution.x > 0 && self->params.output_resolution.y > 0; vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size; output_size = scale_keep_aspect_ratio(self->capture_size, output_size); const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) }; - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); // TODO: Handle region crop - /* Fast opengl free path */ - if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { - int fds[4]; - uint32_t offsets[4]; - uint32_t pitches[4]; - uint64_t modifiers[4]; - for(int i = 0; i < self->num_dmabuf_data; ++i) { - fds[i] = self->dmabuf_data[i].fd; - offsets[i] = self->dmabuf_data[i].offset; - pitches[i] = self->dmabuf_data[i].stride; - modifiers[i] = pipewire_modifiers; - } - if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){region.x, region.y}, self->capture_size, target_pos, output_size, pipewire_fourcc, self->capture_size, fds, offsets, pitches, modifiers, self->num_dmabuf_data)) { - fprintf(stderr, "gsr error: gsr_capture_portal_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); - self->fast_path_failed = true; - } - } else { - self->fast_path_failed = true; - } - - if(self->fast_path_failed) { - gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, - target_pos, output_size, - (vec2i){region.x, region.y}, self->capture_size, - 0.0f, using_external_image, GSR_SOURCE_COLOR_RGB); - } + gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, + target_pos, output_size, + (vec2i){region.x, region.y}, self->capture_size, + GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB); if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) { const vec2d scale = { @@ -404,12 +361,12 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id, (vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y}, (vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height}, - 0.0f, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); gsr_capture_portal_cleanup_plane_fds(self); diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index d8f4c27..16bc988 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -34,7 +34,6 @@ typedef struct { gsr_cursor cursor; bool clear_background; - bool fast_path_failed; } gsr_capture_xcomposite; static void gsr_capture_xcomposite_stop(gsr_capture_xcomposite *self) { @@ -117,10 +116,6 @@ static int gsr_capture_xcomposite_start(gsr_capture *cap, gsr_capture_metadata * capture_metadata->height = self->params.output_resolution.y; } - self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); - if(self->fast_path_failed) - fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); - self->window_resize_timer = clock_get_monotonic_seconds(); return 0; } @@ -258,25 +253,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata const vec2i target_pos = { max_int(0, capture_metdata->width / 2 - output_size.x / 2), max_int(0, capture_metdata->height / 2 - output_size.y / 2) }; - self->params.egl->glFlush(); - self->params.egl->glFinish(); - - /* Fast opengl free path */ - if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metdata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { - if(!vaapi_copy_egl_image_to_video_surface(self->params.egl, self->window_texture.image, (vec2i){0, 0}, self->texture_size, target_pos, output_size, capture_metdata->video_codec_context, capture_metdata->frame)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_capture: vaapi_copy_egl_image_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); - self->fast_path_failed = true; - } - } else { - self->fast_path_failed = true; - } + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); - if(self->fast_path_failed) { - gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), - target_pos, output_size, - (vec2i){0, 0}, self->texture_size, - 0.0f, false, GSR_SOURCE_COLOR_RGB); - } + gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), + target_pos, output_size, + (vec2i){0, 0}, self->texture_size, + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -297,13 +280,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, - 0.0f, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); return 0; } diff --git a/src/capture/ximage.c b/src/capture/ximage.c index 259761d..ac00d72 100644 --- a/src/capture/ximage.c +++ b/src/capture/ximage.c @@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_id, target_pos, output_size, (vec2i){0, 0}, self->capture_size, - 0.0f, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, - 0.0f, false, GSR_SOURCE_COLOR_RGB); + GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/color_conversion.c b/src/color_conversion.c index c1fe894..72390f5 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -5,20 +5,16 @@ #include <math.h> #include <assert.h> -#define MAX_SHADERS 4 -#define MAX_FRAMEBUFFERS 2 -#define EXTERNAL_TEXTURE_SHADER_OFFSET 2 +// TODO: external texture +// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result +// with a call to mix to choose source/output color. -static float abs_f(float v) { - return v >= 0.0f ? v : -v; -} +#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +// TODO: Use the minimal barrier required and move this to egl.h +#define GL_ALL_BARRIER_BITS 0xFFFFFFFF -#define ROTATE_Z "mat4 rotate_z(in float angle) {\n" \ - " return mat4(cos(angle), -sin(angle), 0.0, 0.0,\n" \ - " sin(angle), cos(angle), 0.0, 0.0,\n" \ - " 0.0, 0.0, 1.0, 0.0,\n" \ - " 0.0, 0.0, 0.0, 1.0);\n" \ - "}\n" +#define MAX_FRAMEBUFFERS 2 +#define EXTERNAL_TEXTURE_SHADER_OFFSET 2 /* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */ @@ -48,6 +44,10 @@ static float abs_f(float v) { " 0.060118, 0.429412, -0.038049, 0.000000,\n" \ " 0.062745, 0.500000, 0.500000, 1.000000);\n" +static int max_int(int a, int b) { + return a > b ? a : b; +} + static const char* color_format_range_get_transform_matrix(gsr_destination_color color_format, gsr_color_range color_range) { switch(color_format) { case GSR_DESTINATION_COLOR_NV12: { @@ -76,187 +76,111 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color return NULL; } -static int load_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +// TODO: Make alpha blending optional +// TODO: Optimize these shaders. +static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); - - char vertex_shader[2048]; - snprintf(vertex_shader, sizeof(vertex_shader), - "#version 300 es \n" - "in vec2 pos; \n" - "in vec2 texcoords; \n" - "out vec2 texcoords_out; \n" - "uniform vec2 offset; \n" - "uniform float rotation; \n" - ROTATE_Z - "void main() \n" - "{ \n" - " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" - " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" - "} \n"); - - const char *main_code = - main_code = - " vec4 pixel = texture(tex1, texcoords_out); \n" - " FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n" - " FragColor.w = pixel.a; \n"; - - char fragment_shader[2048]; - if(external_texture) { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "#extension GL_OES_EGL_image_external : enable \n" - "#extension GL_OES_EGL_image_external_essl3 : require \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform samplerExternalOES tex1; \n" - "out vec4 FragColor; \n" - "%s" - "void main() \n" - "{ \n" - "%s" - "} \n", color_transform_matrix, main_code); - } else { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform sampler2D tex1; \n" - "out vec4 FragColor; \n" - "%s" - "void main() \n" - "{ \n" - "%s" - "} \n", color_transform_matrix, main_code); - } - - if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) + const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010; + + char compute_shader[2048]; + snprintf(compute_shader, sizeof(compute_shader), + "#version 430 core\n" + "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" + "uniform sampler2D imgInput;\n" + "uniform ivec2 source_position;\n" + "uniform ivec2 target_position;\n" + "uniform vec2 scale;\n" + "uniform mat2 rotation_matrix;\n" + "layout(%s, binding = 0) uniform image2D imgOutput;\n" + "%s" + "void main() {\n" + " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" + " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" + " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" + " vec4 source_color = texture(imgInput, texCoord);\n" + " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" + " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n" + " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n" + " imageStore(imgOutput, texelCoord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n" + "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "r16" : "r8", color_transform_matrix); + + if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; - gsr_shader_bind_attribute_location(shader, "pos", 0); - gsr_shader_bind_attribute_location(shader, "texcoords", 1); - uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); - uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); + uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); + uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); return 0; } -static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { +static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); - - char vertex_shader[2048]; - snprintf(vertex_shader, sizeof(vertex_shader), - "#version 300 es \n" - "in vec2 pos; \n" - "in vec2 texcoords; \n" - "out vec2 texcoords_out; \n" - "uniform vec2 offset; \n" - "uniform float rotation; \n" - ROTATE_Z - "void main() \n" - "{ \n" - " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" - " gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n" - "} \n"); - - const char *main_code = - main_code = - " vec4 pixel = texture(tex1, texcoords_out); \n" - " FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n" - " FragColor.w = pixel.a; \n"; - - char fragment_shader[2048]; - if(external_texture) { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "#extension GL_OES_EGL_image_external : enable \n" - "#extension GL_OES_EGL_image_external_essl3 : require \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform samplerExternalOES tex1; \n" - "out vec4 FragColor; \n" - "%s" - "void main() \n" - "{ \n" - "%s" - "} \n", color_transform_matrix, main_code); - } else { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform sampler2D tex1; \n" - "out vec4 FragColor; \n" - "%s" - "void main() \n" - "{ \n" - "%s" - "} \n", color_transform_matrix, main_code); - } - - if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) + const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010; + + char compute_shader[2048]; + snprintf(compute_shader, sizeof(compute_shader), + "#version 430 core\n" + "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" + "uniform sampler2D imgInput;\n" + "uniform ivec2 source_position;\n" + "uniform ivec2 target_position;\n" + "uniform vec2 scale;\n" + "uniform mat2 rotation_matrix;\n" + "layout(%s, binding = 0) uniform image2D imgOutput;\n" + "%s" + "void main() {\n" + " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" + " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" + " vec2 rotated_texel_coord = vec2(texelCoord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n" + " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" + " vec4 source_color = texture(imgInput, texCoord * 2.0);\n" + " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" + " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position/2);\n" + " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n" + " imageStore(imgOutput, texelCoord + target_position/2, vec4(uv_color, 1.0, 1.0));\n" + "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "rg16" : "rg8", color_transform_matrix); + + if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; - gsr_shader_bind_attribute_location(shader, "pos", 0); - gsr_shader_bind_attribute_location(shader, "texcoords", 1); - uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); - uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); + uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); + uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); return 0; } -static unsigned int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, bool external_texture) { - char vertex_shader[2048]; - snprintf(vertex_shader, sizeof(vertex_shader), - "#version 300 es \n" - "in vec2 pos; \n" - "in vec2 texcoords; \n" - "out vec2 texcoords_out; \n" - "uniform vec2 offset; \n" - "uniform float rotation; \n" - ROTATE_Z - "void main() \n" - "{ \n" - " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" - " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" - "} \n"); - - const char *main_code = - main_code = - " vec4 pixel = texture(tex1, texcoords_out); \n" - " FragColor = pixel; \n"; - - char fragment_shader[2048]; - if(external_texture) { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "#extension GL_OES_EGL_image_external : enable \n" - "#extension GL_OES_EGL_image_external_essl3 : require \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform samplerExternalOES tex1; \n" - "out vec4 FragColor; \n" - "void main() \n" - "{ \n" - "%s" - "} \n", main_code); - } else { - snprintf(fragment_shader, sizeof(fragment_shader), - "#version 300 es \n" - "precision highp float; \n" - "in vec2 texcoords_out; \n" - "uniform sampler2D tex1; \n" - "out vec4 FragColor; \n" - "void main() \n" - "{ \n" - "%s" - "} \n", main_code); - } - - if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) +static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim) { + char compute_shader[2048]; + snprintf(compute_shader, sizeof(compute_shader), + "#version 430 core\n" + "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" + "uniform sampler2D imgInput;\n" + "uniform ivec2 source_position;\n" + "uniform ivec2 target_position;\n" + "uniform vec2 scale;\n" + "uniform mat2 rotation_matrix;\n" + "layout(rgba8, binding = 0) uniform image2D imgOutput;\n" + "void main() {\n" + " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" + " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" + " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" + " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" + " vec4 source_color = texture(imgInput, texCoord);\n" + //" vec4 output_color = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n" + //" vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n" + " imageStore(imgOutput, texelCoord + target_position, source_color);\n" + "}\n", max_local_size_dim, max_local_size_dim); + + if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) return -1; - gsr_shader_bind_attribute_location(shader, "pos", 0); - gsr_shader_bind_attribute_location(shader, "texcoords", 1); - uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); - uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); + uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); + uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); + uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); + uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); return 0; } @@ -315,6 +239,11 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver memset(self, 0, sizeof(*self)); self->params.egl = params->egl; self->params = *params; + + int max_compute_work_group_invocations = 256; + self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations); + self->max_local_size_dim = sqrt(max_compute_work_group_invocations); + fprintf(stderr, "max local size: %d, max_local_size_dim: %d\n", max_compute_work_group_invocations, self->max_local_size_dim); switch(params->destination_color) { case GSR_DESTINATION_COLOR_NV12: @@ -324,27 +253,15 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], params->destination_color, params->color_range, false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); + if(load_compute_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], self->max_local_size_dim, params->destination_color, params->color_range) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - if(load_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], params->destination_color, params->color_range, false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); + if(load_compute_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], self->max_local_size_dim, params->destination_color, params->color_range) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); goto err; } - - if(self->params.load_external_image_shader) { - if(load_shader_y(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], params->destination_color, params->color_range, true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); - goto err; - } - - if(load_shader_uv(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], params->destination_color, params->color_range, true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); - goto err; - } - } break; } case GSR_DESTINATION_COLOR_RGB8: { @@ -353,17 +270,10 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->uniforms[0], false) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); + if(load_compute_shader_rgb(&self->shaders[2], self->params.egl, &self->uniforms[2], self->max_local_size_dim) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); goto err; } - - if(self->params.load_external_image_shader) { - if(load_shader_rgb(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], true) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); - goto err; - } - } break; } } @@ -400,127 +310,119 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) { self->framebuffers[i] = 0; } - for(int i = 0; i < MAX_SHADERS; ++i) { + for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) { gsr_shader_deinit(&self->shaders[i]); } self->params.egl = NULL; } -static void gsr_color_conversion_swizzle_texture_source(gsr_color_conversion *self, gsr_source_color source_color) { - if(source_color == GSR_SOURCE_COLOR_BGR) { - const int swizzle_mask[] = { GL_BLUE, GL_GREEN, GL_RED, 1 }; - self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask); +static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) { + /* + rotation_matrix[0][0] = cos(angle); + rotation_matrix[0][1] = -sin(angle); + rotation_matrix[1][0] = sin(angle); + rotation_matrix[1][1] = cos(angle); + The manual matrix code below is the same as this code above, but without floating-point errors. + This is done to remove any blurring caused by these floating-point errors. + */ + switch(rotation) { + case GSR_ROT_0: + rotation_matrix[0][0] = 1.0f; + rotation_matrix[0][1] = 0.0f; + rotation_matrix[1][0] = 0.0f; + rotation_matrix[1][1] = 1.0f; + break; + case GSR_ROT_90: + rotation_matrix[0][0] = 0.0f; + rotation_matrix[0][1] = -1.0f; + rotation_matrix[1][0] = 1.0f; + rotation_matrix[1][1] = 0.0f; + source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); + source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); + break; + case GSR_ROT_180: + rotation_matrix[0][0] = -1.0f; + rotation_matrix[0][1] = 0.0f; + rotation_matrix[1][0] = 0.0f; + rotation_matrix[1][1] = -1.0f; + break; + case GSR_ROT_270: + rotation_matrix[0][0] = 0.0f; + rotation_matrix[0][1] = 1.0f; + rotation_matrix[1][0] = -1.0f; + rotation_matrix[1][1] = 0.0f; + source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); + source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); + break; } } -static void gsr_color_conversion_swizzle_reset(gsr_color_conversion *self, gsr_source_color source_color) { - if(source_color == GSR_SOURCE_COLOR_BGR) { - const int swizzle_mask[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA }; - self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask); - } -} +// TODO: Handle source_color +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) { + vec2f scale = {0.0f, 0.0f}; + if(texture_size.x > 0 && texture_size.y > 0) + scale = (vec2f){ (double)destination_size.x/(double)texture_size.x, (double)destination_size.y/(double)texture_size.y }; -/* |source_pos| is in pixel coordinates and |source_size| */ -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color) { - // TODO: Remove this crap - rotation = M_PI*2.0f - rotation; + vec2i source_position = {0, 0}; + float rotation_matrix[2][2] = {{0, 0}, {0, 0}}; + gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale); - /* TODO: Do not call this every frame? */ - vec2i dest_texture_size = {0, 0}; - self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]); - self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x); - self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y); - self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + source_position.x += texture_pos.x; + source_position.y += texture_pos.y; const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D; - self->params.egl->glBindTexture(texture_target, texture_id); - vec2i source_texture_size = {0, 0}; - if(external_texture) { - assert(self->params.load_external_image_shader); - source_texture_size = source_size; - } else { - /* TODO: Do not call this every frame? */ - self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_WIDTH, &source_texture_size.x); - self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_HEIGHT, &source_texture_size.y); - } - - // TODO: Remove this crap - if(abs_f(M_PI * 0.5f - rotation) <= 0.001f || abs_f(M_PI * 1.5f - rotation) <= 0.001f) { - float tmp = source_texture_size.x; - source_texture_size.x = source_texture_size.y; - source_texture_size.y = tmp; - } - - const vec2f pos_norm = { - ((float)source_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f, - ((float)source_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f, - }; - - const vec2f size_norm = { - ((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f, - ((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f, - }; - - const vec2f texture_pos_norm = { - (float)texture_pos.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x), - (float)texture_pos.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y), - }; - - const vec2f texture_size_norm = { - (float)texture_size.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x), - (float)texture_size.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y), - }; - - const float vertices[] = { - -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, - -1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y, - -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, - - -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, - -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, - -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y - }; - - gsr_color_conversion_swizzle_texture_source(self, source_color); - - self->params.egl->glBindVertexArray(self->vertex_array_object_id); - self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y); - - /* TODO: this, also cleanup */ - //self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id); - self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices); - - { - self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]); - //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example) + switch(self->params.destination_color) { + case GSR_DESTINATION_COLOR_NV12: + case GSR_DESTINATION_COLOR_P010: { + const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; + // Y + { + gsr_shader_use(&self->shaders[0]); + self->params.egl->glUniformMatrix2fv(self->uniforms[0].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2i(self->uniforms[0].source_position, source_position.x, source_position.y); + self->params.egl->glUniform2i(self->uniforms[0].target_position, destination_pos.x, destination_pos.y); + self->params.egl->glUniform2f(self->uniforms[0].scale, scale.x, scale.y); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_R16 : GL_R8); + const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5; + const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5; + self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); + } - const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET : 0; - gsr_shader_use(&self->shaders[shader_index]); - self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation); - self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y); - self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); + // UV + { + gsr_shader_use(&self->shaders[1]); + self->params.egl->glUniformMatrix2fv(self->uniforms[1].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2i(self->uniforms[1].source_position, source_position.x, source_position.y); + self->params.egl->glUniform2i(self->uniforms[1].target_position, destination_pos.x, destination_pos.y); + self->params.egl->glUniform2f(self->uniforms[1].scale, scale.x, scale.y); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[1], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_RG16 : GL_RG8); + const double num_groups_x = (double)texture_size.x*0.5/(double)self->max_local_size_dim + 0.5; + const double num_groups_y = (double)texture_size.y*0.5/(double)self->max_local_size_dim + 0.5; + self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); + } + break; + } + case GSR_DESTINATION_COLOR_RGB8: { + gsr_shader_use(&self->shaders[2]); + self->params.egl->glUniformMatrix2fv(self->uniforms[2].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); + self->params.egl->glUniform2i(self->uniforms[2].source_position, source_position.x, source_position.y); + self->params.egl->glUniform2i(self->uniforms[2].target_position, destination_pos.x, destination_pos.y); + self->params.egl->glUniform2f(self->uniforms[2].scale, scale.x, scale.y); + self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8); + const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5; + const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5; + self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); + break; + } } - if(self->params.num_destination_textures > 1) { - self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); - //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); + self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT + self->params.egl->glUseProgram(0); - const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET + 1 : 1; - gsr_shader_use(&self->shaders[shader_index]); - self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation); - self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y); - self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); - } - - self->params.egl->glBindVertexArray(0); - gsr_shader_use_none(&self->shaders[0]); self->params.egl->glBindTexture(texture_target, 0); - self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); - - gsr_color_conversion_swizzle_reset(self, source_color); } void gsr_color_conversion_clear(gsr_color_conversion *self) { @@ -557,3 +459,7 @@ void gsr_color_conversion_clear(gsr_color_conversion *self) { self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); } + +gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation) { + return (gsr_rotation)monitor_rotation; +} @@ -284,6 +284,7 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGenTextures, "glGenTextures" }, { (void**)&self->glDeleteTextures, "glDeleteTextures" }, { (void**)&self->glBindTexture, "glBindTexture" }, + { (void**)&self->glBindImageTexture, "glBindImageTexture" }, { (void**)&self->glTexParameteri, "glTexParameteri" }, { (void**)&self->glTexParameteriv, "glTexParameteriv" }, { (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" }, @@ -293,6 +294,8 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGenFramebuffers, "glGenFramebuffers" }, { (void**)&self->glBindFramebuffer, "glBindFramebuffer" }, { (void**)&self->glDeleteFramebuffers, "glDeleteFramebuffers" }, + { (void**)&self->glDispatchCompute, "glDispatchCompute" }, + { (void**)&self->glMemoryBarrier, "glMemoryBarrier" }, { (void**)&self->glViewport, "glViewport" }, { (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" }, { (void**)&self->glDrawBuffers, "glDrawBuffers" }, @@ -329,11 +332,14 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGetUniformLocation, "glGetUniformLocation" }, { (void**)&self->glUniform1f, "glUniform1f" }, { (void**)&self->glUniform2f, "glUniform2f" }, + { (void**)&self->glUniform2i, "glUniform2i" }, + { (void**)&self->glUniformMatrix2fv, "glUniformMatrix2fv" }, { (void**)&self->glDebugMessageCallback, "glDebugMessageCallback" }, { (void**)&self->glScissor, "glScissor" }, { (void**)&self->glReadPixels, "glReadPixels" }, { (void**)&self->glMapBuffer, "glMapBuffer" }, { (void**)&self->glUnmapBuffer, "glUnmapBuffer" }, + { (void**)&self->glGetIntegerv, "glGetIntegerv" }, { NULL, NULL } }; diff --git a/src/encoder/video/software.c b/src/encoder/video/software.c index 3649ff1..627cdea 100644 --- a/src/encoder/video/software.c +++ b/src/encoder/video/software.c @@ -83,8 +83,8 @@ static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); // cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); - self->params.egl->glFlush(); - self->params.egl->glFinish(); + //self->params.egl->glFlush(); + //self->params.egl->glFinish(); } static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c index 8bb2f08..1d5dae0 100644 --- a/src/encoder/video/vaapi.c +++ b/src/encoder/video/vaapi.c @@ -123,8 +123,8 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]); self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); while(self->params.egl->glGetError()) {} while(self->params.egl->eglGetError() != EGL_SUCCESS){} diff --git a/src/image_writer.c b/src/image_writer.c index c4d66f4..e153a34 100644 --- a/src/image_writer.c +++ b/src/image_writer.c @@ -17,7 +17,7 @@ bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int widt self->egl = egl; self->width = width; self->height = height; - self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGB8, GL_RGB, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */ + self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGBA8, GL_RGBA, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */ if(self->texture == 0) { fprintf(stderr, "gsr error: gsr_image_writer_init: failed to create texture\n"); return false; @@ -50,10 +50,10 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const bool success = false; switch(image_format) { case GSR_IMAGE_FORMAT_JPEG: - success = stbi_write_jpg(filepath, self->width, self->height, 3, data, quality); + success = stbi_write_jpg(filepath, self->width, self->height, 4, data, quality); break; case GSR_IMAGE_FORMAT_PNG: - success = stbi_write_png(filepath, self->width, self->height, 3, data, 0); + success = stbi_write_png(filepath, self->width, self->height, 4, data, 0); break; } @@ -65,7 +65,7 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self, const char *filepath, gsr_image_format image_format, int quality) { assert(self->source == GSR_IMAGE_WRITER_SOURCE_OPENGL); - uint8_t *frame_data = malloc(self->width * self->height * 3); + uint8_t *frame_data = malloc(self->width * self->height * 4); if(!frame_data) { fprintf(stderr, "gsr error: gsr_image_writer_write_to_file: failed to allocate memory for image frame\n"); return false; @@ -74,7 +74,7 @@ static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self // TODO: hdr support self->egl->glBindTexture(GL_TEXTURE_2D, self->texture); // We could use glGetTexSubImage, but it's only available starting from opengl 4.5 - self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, frame_data); + self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame_data); self->egl->glBindTexture(GL_TEXTURE_2D, 0); self->egl->glFlush(); diff --git a/src/main.cpp b/src/main.cpp index 6adeb05..f0a3e06 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1072,8 +1072,9 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // TODO: More quality options if(low_power) av_dict_set_int(&options, "low_power", 1, 0); - // Improves performance but increases vram - //av_dict_set_int(&options, "async_depth", 8, 0); + // Improves performance but increases vram. + // TODO: Might need a different async_depth for optimal performance on different amd/intel gpus + //av_dict_set_int(&options, "async_depth", 3, 0); if(codec_context->codec_id == AV_CODEC_ID_H264) { // Removed because it causes stutter in games for some people diff --git a/src/shader.c b/src/shader.c index dcb956b..b9fbb62 100644 --- a/src/shader.c +++ b/src/shader.c @@ -36,28 +36,36 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s return shader_id; } -static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) { +static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) { unsigned int vertex_shader_id = 0; unsigned int fragment_shader_id = 0; + unsigned int compute_shader_id = 0; unsigned int program_id = 0; int linked = 0; + bool success = false; if(vertex_shader) { vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader); if(vertex_shader_id == 0) - goto err; + goto done; } if(fragment_shader) { fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader); if(fragment_shader_id == 0) - goto err; + goto done; + } + + if(compute_shader) { + compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader); + if(compute_shader_id == 0) + goto done; } program_id = egl->glCreateProgram(); if(program_id == 0) { fprintf(stderr, "gsr error: load_program: failed to create shader program, error: %d\n", egl->glGetError()); - goto err; + goto done; } if(vertex_shader_id) @@ -66,6 +74,9 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const if(fragment_shader_id) egl->glAttachShader(program_id, fragment_shader_id); + if(compute_shader_id) + egl->glAttachShader(program_id, compute_shader_id); + egl->glLinkProgram(program_id); egl->glGetProgramiv(program_id, GL_LINK_STATUS, &linked); @@ -79,37 +90,36 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const fprintf(stderr, "gsr error: load program: linking shader program failed, error:\n%s\n", info_log); } - goto err; + goto done; } - if(fragment_shader_id) - egl->glDeleteShader(fragment_shader_id); - if(vertex_shader_id) - egl->glDeleteShader(vertex_shader_id); - - return program_id; + success = true; + done: - err: - if(program_id) - egl->glDeleteProgram(program_id); + if(!success) { + if(program_id) + egl->glDeleteProgram(program_id); + } + if(compute_shader_id) + egl->glDeleteShader(compute_shader_id); if(fragment_shader_id) egl->glDeleteShader(fragment_shader_id); if(vertex_shader_id) egl->glDeleteShader(vertex_shader_id); - return 0; + return program_id; } -int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) { +int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) { assert(egl); self->egl = egl; self->program_id = 0; - if(!vertex_shader && !fragment_shader) { - fprintf(stderr, "gsr error: gsr_shader_init: vertex shader and fragment shader can't be NULL at the same time\n"); + if(!vertex_shader && !fragment_shader && !compute_shader) { + fprintf(stderr, "gsr error: gsr_shader_init: vertex, fragment shader and compute shaders can't be NULL at the same time\n"); return -1; } - self->program_id = load_program(self->egl, vertex_shader, fragment_shader); + self->program_id = load_program(self->egl, vertex_shader, fragment_shader, compute_shader); if(self->program_id == 0) return -1; diff --git a/src/utils.c b/src/utils.c index 325f750..943fb2d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -14,10 +14,8 @@ #include <xf86drmMode.h> #include <xf86drm.h> -#include <libdrm/drm_fourcc.h> #include <X11/Xatom.h> #include <X11/extensions/Xrandr.h> -#include <va/va_drmcommon.h> #include <libavcodec/avcodec.h> #include <libavutil/hwcontext_vaapi.h> @@ -663,241 +661,6 @@ bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) { return device_context->type == AV_HWDEVICE_TYPE_VAAPI; } -static uint32_t drm_fourcc_to_va_fourcc(uint32_t drm_fourcc) { - switch(drm_fourcc) { - case DRM_FORMAT_XRGB8888: return VA_FOURCC_BGRX; - case DRM_FORMAT_XBGR8888: return VA_FOURCC_RGBX; - case DRM_FORMAT_RGBX8888: return VA_FOURCC_XBGR; - case DRM_FORMAT_BGRX8888: return VA_FOURCC_XRGB; - case DRM_FORMAT_ARGB8888: return VA_FOURCC_BGRA; - case DRM_FORMAT_ABGR8888: return VA_FOURCC_RGBA; - case DRM_FORMAT_RGBA8888: return VA_FOURCC_ABGR; - case DRM_FORMAT_BGRA8888: return VA_FOURCC_ARGB; - default: return drm_fourcc; - } -} - -bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) { - VAConfigID config_id = 0; - VAContextID context_id = 0; - VASurfaceID input_surface_id = 0; - VABufferID buffer_id = 0; - bool success = true; - - VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context); - if(!va_dpy) { - success = false; - goto done; - } - - VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %s\n", vaErrorStr(va_status)); - success = false; - goto done; - } - - VASurfaceID output_surface_id = (uintptr_t)video_frame->data[3]; - va_status = vaCreateContext(va_dpy, config_id, size.x, size.y, VA_PROGRESSIVE, &output_surface_id, 1, &context_id); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %s\n", vaErrorStr(va_status)); - success = false; - goto done; - } - - VADRMPRIMESurfaceDescriptor buf = {0}; - buf.fourcc = drm_fourcc_to_va_fourcc(format);//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10 - buf.width = size.x; - buf.height = size.y; - buf.num_objects = num_planes; - buf.num_layers = 1; - buf.layers[0].drm_format = format; - buf.layers[0].num_planes = buf.num_objects; - for(int i = 0; i < num_planes; ++i) { - buf.objects[i].fd = fds[i]; - buf.objects[i].size = size.y * pitches[i]; // TODO: - buf.objects[i].drm_format_modifier = modifiers[i]; - - buf.layers[0].object_index[i] = i; - buf.layers[0].offset[i] = offsets[i]; - buf.layers[0].pitch[i] = pitches[i]; - } - - VASurfaceAttrib attribs[2] = {0}; - attribs[0].type = VASurfaceAttribMemoryType; - attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; - attribs[0].value.type = VAGenericValueTypeInteger; - attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2; - attribs[1].type = VASurfaceAttribExternalBufferDescriptor; - attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; - attribs[1].value.type = VAGenericValueTypePointer; - attribs[1].value.value.p = &buf; - - // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10 - // TODO: Max size same as source_size - va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, size.x, size.y, &input_surface_id, 1, attribs, 2); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %s\n", vaErrorStr(va_status)); - success = false; - goto done; - } - - const VARectangle source_region = { - .x = source_pos.x, - .y = source_pos.y, - .width = source_size.x, - .height = source_size.y - }; - - const VARectangle output_region = { - .x = dest_pos.x, - .y = dest_pos.y, - .width = dest_size.x, - .height = dest_size.y - }; - - const bool scaled = dest_size.x != source_size.x || dest_size.y != source_size.y; - - // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi! - VAProcPipelineParameterBuffer params = {0}; - params.surface = input_surface_id; - params.surface_region = NULL; - params.surface_region = &source_region; - params.output_region = &output_region; - params.output_background_color = 0; - params.filter_flags = scaled ? (VA_FILTER_SCALING_HQ | VA_FILTER_INTERPOLATION_BILINEAR) : 0; - params.pipeline_flags = VA_PROC_PIPELINE_FAST; - - params.input_color_properties.colour_primaries = 1; - params.input_color_properties.transfer_characteristics = 1; - params.input_color_properties.matrix_coefficients = 1; - params.surface_color_standard = VAProcColorStandardBT709; // TODO: - params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; - - params.output_color_properties.colour_primaries = 1; - params.output_color_properties.transfer_characteristics = 1; - params.output_color_properties.matrix_coefficients = 1; - params.output_color_standard = VAProcColorStandardBT709; // TODO: - params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; - - params.processing_mode = VAProcPerformanceMode; - - // VAProcPipelineCaps pipeline_caps = {0}; - // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy, - // self->context_id, - // NULL, 0, - // &pipeline_caps); - // if(va_status == VA_STATUS_SUCCESS) { - // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags); - // } - - // TODO: params.output_hdr_metadata - - // TODO: - // if (first surface to render) - // pipeline_param->output_background_color = 0xff000000; // black - - va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, ¶ms, &buffer_id); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status); - success = false; - goto done; - } - - va_status = vaBeginPicture(va_dpy, context_id, output_surface_id); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status); - success = false; - goto done; - } - - va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1); - if(va_status != VA_STATUS_SUCCESS) { - vaEndPicture(va_dpy, context_id); - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status); - success = false; - goto done; - } - - va_status = vaEndPicture(va_dpy, context_id); - if(va_status != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status); - success = false; - goto done; - } - - // vaSyncBuffer(va_dpy, buffer_id, 1000 * 1000 * 1000); - // vaSyncSurface(va_dpy, input_surface_id); - // vaSyncSurface(va_dpy, output_surface_id); - - done: - if(buffer_id) - vaDestroyBuffer(va_dpy, buffer_id); - - if(input_surface_id) - vaDestroySurfaces(va_dpy, &input_surface_id, 1); - - if(context_id) - vaDestroyContext(va_dpy, context_id); - - if(config_id) - vaDestroyConfig(va_dpy, config_id); - - return success; -} - -bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame) { - if(!image) - return false; - - int texture_fourcc = 0; - int texture_num_planes = 0; - uint64_t texture_modifiers = 0; - if(!egl->eglExportDMABUFImageQueryMESA(egl->egl_display, image, &texture_fourcc, &texture_num_planes, &texture_modifiers)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageQueryMESA failed\n"); - return false; - } - - if(texture_num_planes <= 0 || texture_num_planes > 8) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: expected planes size to be 0<planes<=8 for drm buf, got %d planes\n", texture_num_planes); - return false; - } - - int texture_fds[8]; - int32_t texture_strides[8]; - int32_t texture_offsets[8]; - - while(egl->eglGetError() != EGL_SUCCESS){} - if(!egl->eglExportDMABUFImageMESA(egl->egl_display, image, texture_fds, texture_strides, texture_offsets)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageMESA failed, error: %d\n", egl->eglGetError()); - return false; - } - - int fds[8]; - uint32_t offsets[8]; - uint32_t pitches[8]; - uint64_t modifiers[8]; - for(int i = 0; i < texture_num_planes; ++i) { - fds[i] = texture_fds[i]; - offsets[i] = texture_offsets[i]; - pitches[i] = texture_strides[i]; - modifiers[i] = texture_modifiers; - - if(fds[i] == -1) - texture_num_planes = i; - } - const bool success = texture_num_planes > 0 && vaapi_copy_drm_planes_to_video_surface(video_codec_context, video_frame, source_pos, source_size, dest_pos, dest_size, texture_fourcc, source_size, fds, offsets, pitches, modifiers, texture_num_planes); - - for(int i = 0; i < texture_num_planes; ++i) { - if(texture_fds[i] > 0) { - close(texture_fds[i]); - texture_fds[i] = -1; - } - } - - return success; -} - vec2i scale_keep_aspect_ratio(vec2i from, vec2i to) { if(from.x == 0 || from.y == 0) return (vec2i){0, 0}; |