diff options
author | dec05eba <dec05eba@protonmail.com> | 2025-03-29 15:37:53 +0100 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2025-03-29 15:37:53 +0100 |
commit | f85a7ab2055ddc8e2231a380f0c46e29e42c2072 (patch) | |
tree | 33cf9c1d086f3bd8332e36e97c73f793f18db11e | |
parent | a41a32cb9079dc3c78414b7b0cdc68243333020d (diff) |
Revert "WIP: use compute shader instead of graphics shader for better performance (especially on amd)"
This reverts commit a41a32cb9079dc3c78414b7b0cdc68243333020d.
-rw-r--r-- | include/color_conversion.h | 26 | ||||
-rw-r--r-- | include/egl.h | 14 | ||||
-rw-r--r-- | include/image_writer.h | 2 | ||||
-rw-r--r-- | include/shader.h | 2 | ||||
-rw-r--r-- | include/utils.h | 2 | ||||
-rw-r--r-- | src/capture/kms.c | 101 | ||||
-rw-r--r-- | src/capture/nvfbc.c | 10 | ||||
-rw-r--r-- | src/capture/portal.c | 61 | ||||
-rw-r--r-- | src/capture/xcomposite.c | 35 | ||||
-rw-r--r-- | src/capture/ximage.c | 4 | ||||
-rw-r--r-- | src/color_conversion.c | 516 | ||||
-rw-r--r-- | src/egl.c | 6 | ||||
-rw-r--r-- | src/encoder/video/software.c | 4 | ||||
-rw-r--r-- | src/encoder/video/vaapi.c | 4 | ||||
-rw-r--r-- | src/image_writer.c | 10 | ||||
-rw-r--r-- | src/main.cpp | 5 | ||||
-rw-r--r-- | src/shader.c | 48 | ||||
-rw-r--r-- | src/utils.c | 237 |
18 files changed, 751 insertions, 336 deletions
diff --git a/include/color_conversion.h b/include/color_conversion.h index 37eeb8e..b80be21 100644 --- a/include/color_conversion.h +++ b/include/color_conversion.h @@ -2,12 +2,9 @@ #define GSR_COLOR_CONVERSION_H #include "shader.h" -#include "defs.h" #include "vec2.h" #include <stdbool.h> -#define GSR_COLOR_CONVERSION_MAX_SHADERS 3 - typedef enum { GSR_COLOR_RANGE_LIMITED, GSR_COLOR_RANGE_FULL @@ -29,18 +26,9 @@ typedef enum { GSR_DESTINATION_COLOR_RGB8 } gsr_destination_color; -typedef enum { - GSR_ROT_0, - GSR_ROT_90, - GSR_ROT_180, - GSR_ROT_270 -} gsr_rotation; - typedef struct { - int rotation_matrix; - int source_position; - int target_position; - int scale; + int offset; + int rotation; } gsr_color_uniforms; typedef struct { @@ -57,23 +45,19 @@ typedef struct { typedef struct { gsr_color_conversion_params params; - gsr_color_uniforms uniforms[GSR_COLOR_CONVERSION_MAX_SHADERS]; - gsr_shader shaders[GSR_COLOR_CONVERSION_MAX_SHADERS]; + gsr_color_uniforms uniforms[4]; + gsr_shader shaders[4]; unsigned int framebuffers[2]; unsigned int vertex_array_object_id; unsigned int vertex_buffer_object_id; - - int max_local_size_dim; } gsr_color_conversion; int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params); void gsr_color_conversion_deinit(gsr_color_conversion *self); -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color); +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color); void gsr_color_conversion_clear(gsr_color_conversion *self); -gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation); - #endif /* GSR_COLOR_CONVERSION_H */ diff --git a/include/egl.h b/include/egl.h index 643ab30..0d08270 100644 --- a/include/egl.h +++ b/include/egl.h @@ -98,7 +98,7 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_TEXTURE_EXTERNAL_OES 0x8D65 #define GL_RED 0x1903 #define GL_GREEN 0x1904 -#define GL_BLUE 0x1905 +#define GL_BLUE 0x1905 #define GL_ALPHA 0x1906 #define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 #define GL_RG 0x8227 @@ -111,7 +111,6 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_R16 0x822A #define GL_RG16 0x822C #define GL_RGB16 0x8054 -#define GL_RGBA32F 0x8814 #define GL_UNSIGNED_BYTE 0x1401 #define GL_COLOR_BUFFER_BIT 0x00004000 #define GL_TEXTURE_WRAP_S 0x2802 @@ -135,10 +134,6 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_SCISSOR_TEST 0x0C11 #define GL_PACK_ALIGNMENT 0x0D05 #define GL_UNPACK_ALIGNMENT 0x0CF5 -#define GL_READ_ONLY 0x88B8 -#define GL_WRITE_ONLY 0x88B9 -#define GL_READ_WRITE 0x88BA -#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS 0x90EB #define GL_VENDOR 0x1F00 #define GL_RENDERER 0x1F01 @@ -148,7 +143,6 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_INFO_LOG_LENGTH 0x8B84 #define GL_FRAGMENT_SHADER 0x8B30 #define GL_VERTEX_SHADER 0x8B31 -#define GL_COMPUTE_SHADER 0x91B9 #define GL_COMPILE_STATUS 0x8B81 #define GL_LINK_STATUS 0x8B82 @@ -237,7 +231,6 @@ struct gsr_egl { void (*glGenTextures)(int n, unsigned int *textures); void (*glDeleteTextures)(int n, const unsigned int *texture); void (*glBindTexture)(unsigned int target, unsigned int texture); - void (*glBindImageTexture)(unsigned int unit, unsigned int texture, int level, unsigned char layered, int layer, unsigned int access, unsigned int format); void (*glTexParameteri)(unsigned int target, unsigned int pname, int param); void (*glTexParameteriv)(unsigned int target, unsigned int pname, const int *params); void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params); @@ -247,8 +240,6 @@ struct gsr_egl { void (*glGenFramebuffers)(int n, unsigned int *framebuffers); void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer); void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers); - void (*glDispatchCompute)(unsigned int num_groups_x, unsigned int num_groups_y, unsigned int num_groups_z); - void (*glMemoryBarrier)(unsigned int barriers); void (*glViewport)(int x, int y, int width, int height); void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level); void (*glDrawBuffers)(int n, const unsigned int *bufs); @@ -285,14 +276,11 @@ struct gsr_egl { int (*glGetUniformLocation)(unsigned int program, const char *name); void (*glUniform1f)(int location, float v0); void (*glUniform2f)(int location, float v0, float v1); - void (*glUniform2i)(int location, int v0, int v1); - void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value); void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam); void (*glScissor)(int x, int y, int width, int height); void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels); void* (*glMapBuffer)(unsigned int target, unsigned int access); unsigned char (*glUnmapBuffer)(unsigned int target); - void (*glGetIntegerv)(unsigned int pname, int *params); }; bool gsr_egl_load(gsr_egl *self, gsr_window *window, bool is_monitor_capture, bool enable_debug); diff --git a/include/image_writer.h b/include/image_writer.h index 65e7497..400edd0 100644 --- a/include/image_writer.h +++ b/include/image_writer.h @@ -25,7 +25,7 @@ typedef struct { } gsr_image_writer; bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int width, int height); -/* |memory| is taken as a reference. The data is expected to be in rgba8 format (8 bit rgba) */ +/* |memory| is taken as a reference */ bool gsr_image_writer_init_memory(gsr_image_writer *self, const void *memory, int width, int height); void gsr_image_writer_deinit(gsr_image_writer *self); diff --git a/include/shader.h b/include/shader.h index 8bc1104..57d1096 100644 --- a/include/shader.h +++ b/include/shader.h @@ -9,7 +9,7 @@ typedef struct { } gsr_shader; /* |vertex_shader| or |fragment_shader| may be NULL */ -int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader); +int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader); void gsr_shader_deinit(gsr_shader *self); int gsr_shader_bind_attribute_location(gsr_shader *self, const char *attribute, int location); diff --git a/include/utils.h b/include/utils.h index 873e6e4..fd340e8 100644 --- a/include/utils.h +++ b/include/utils.h @@ -64,6 +64,8 @@ int create_directory_recursive(char *path); /* |img_attr| needs to be at least 44 in size */ void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes, bool use_modifier); bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context); +bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes); +bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame); vec2i scale_keep_aspect_ratio(vec2i from, vec2i to); diff --git a/src/capture/kms.c b/src/capture/kms.c index e98fab7..578fded 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -53,6 +53,10 @@ typedef struct { bool is_x11; gsr_cursor x11_cursor; + bool performance_error_shown; + bool fast_path_failed; + bool mesa_supports_compute_only_vaapi_copy; + //int drm_fd; //uint64_t prev_sequence; //bool damaged; @@ -225,6 +229,17 @@ static int gsr_capture_kms_start(gsr_capture *cap, gsr_capture_metadata *capture capture_metadata->height = self->capture_size.y; } + self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); + if(self->fast_path_failed) + fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); + + //if(self->params.hdr) { + // self->fast_path_failed = true; + // fprintf(stderr, "gsr warning: gsr_capture_kms_start: recording with hdr requires shader color conversion which might be slow. If this is an issue record with -w portal instead (which converts HDR to SDR)\n"); + //} + + self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6); + self->last_time_monitor_check = clock_get_monotonic_seconds(); return 0; } @@ -259,6 +274,16 @@ static void gsr_capture_kms_on_event(gsr_capture *cap, gsr_egl *egl) { // } // } +static float monitor_rotation_to_radians(gsr_monitor_rotation rot) { + switch(rot) { + case GSR_MONITOR_ROT_0: return 0.0f; + case GSR_MONITOR_ROT_90: return M_PI_2; + case GSR_MONITOR_ROT_180: return M_PI; + case GSR_MONITOR_ROT_270: return M_PI + M_PI_2; + } + return 0.0f; +} + static gsr_kms_response_item* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) { for(int i = 0; i < kms_response->num_items; ++i) { if(kms_response->items[i].connector_id == connector_id && !kms_response->items[i].is_cursor) @@ -424,7 +449,7 @@ static gsr_kms_response_item* find_cursor_drm_if_on_monitor(gsr_capture_kms *sel return cursor_drm_fd; } -static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, vec2i output_size, vec2i framebuffer_size) { +static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, float texture_rotation, vec2i output_size, vec2i framebuffer_size) { const vec2d scale = { self->capture_size.x == 0 ? 0 : (double)output_size.x / (double)self->capture_size.x, self->capture_size.y == 0 ? 0 : (double)output_size.y / (double)self->capture_size.y @@ -498,7 +523,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->cursor_texture_id, cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y}, (vec2i){0, 0}, cursor_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); + texture_rotation, cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -526,7 +551,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id, cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y}, (vec2i){0, 0}, self->x11_cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + 0.0f, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } @@ -579,6 +604,16 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) { self->capture_size = rotate_capture_size_if_rotated(self, monitor.size); } +static void gsr_capture_kms_fail_fast_path_if_not_fast(gsr_capture_kms *self, uint32_t pixel_format) { + const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF; + if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) { + self->fast_path_failed = true; + fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used." + " If you experience performance problems in the video then record on a single window on X11 or use portal capture option instead or disable 10/12-bit color option in your desktop environment settings," + " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n"); + } +} + static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) { gsr_capture_kms *self = cap->priv; @@ -610,6 +645,15 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata)) gsr_kms_set_hdr_metadata(self, drm_fd); + if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { + self->performance_error_shown = true; + self->fast_path_failed = true; + fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used." + " If you experience performance problems in the video then record a single window on X11 or use portal capture option instead\n"); + } + + gsr_capture_kms_fail_fast_path_if_not_fast(self, drm_fd->pixel_format); + self->capture_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h }); if(self->params.region_size.x > 0 && self->params.region_size.y > 0) self->capture_size = self->params.region_size; @@ -618,6 +662,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size; output_size = scale_keep_aspect_ratio(self->capture_size, output_size); + const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation); const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) }; gsr_capture_kms_update_capture_size_change(self, color_conversion, target_pos, drm_fd); @@ -628,19 +673,41 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu capture_pos.x += self->params.region_position.x; capture_pos.y += self->params.region_position.y; - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); - - EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); - if(image) { - gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); - self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); + self->params.egl->glFlush(); + self->params.egl->glFinish(); + + /* Fast opengl free path */ + if(!self->fast_path_failed && self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { + int fds[4]; + uint32_t offsets[4]; + uint32_t pitches[4]; + uint64_t modifiers[4]; + for(int i = 0; i < drm_fd->num_dma_bufs; ++i) { + fds[i] = drm_fd->dma_buf[i].fd; + offsets[i] = drm_fd->dma_buf[i].offset; + pitches[i] = drm_fd->dma_buf[i].pitch; + modifiers[i] = drm_fd->modifier; + } + if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){capture_pos.x, capture_pos.y}, self->capture_size, target_pos, output_size, drm_fd->pixel_format, (vec2i){drm_fd->width, drm_fd->height}, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs)) { + fprintf(stderr, "gsr error: gsr_capture_kms_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); + self->fast_path_failed = true; + } + } else { + self->fast_path_failed = true; } - gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, - target_pos, output_size, - capture_pos, self->capture_size, - gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + if(self->fast_path_failed) { + EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); + if(image) { + gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); + self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); + } + + gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, + target_pos, output_size, + capture_pos, self->capture_size, + texture_rotation, self->external_texture_fallback, GSR_SOURCE_COLOR_RGB); + } if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); @@ -655,12 +722,12 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu render_x11_cursor(self, color_conversion, cursor_monitor_offset, target_pos, output_size); } else if(cursor_drm_fd) { const vec2i framebuffer_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h }); - render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, output_size, framebuffer_size); + render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, texture_rotation, output_size, framebuffer_size); } } - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); gsr_capture_kms_cleanup_kms_fds(self); diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 5f47b00..b6a3671 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -390,16 +390,16 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap return 0; } - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex], target_pos, (vec2i){output_size.x, output_size.y}, self->params.region_position, frame_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR); + 0.0f, false, GSR_SOURCE_COLOR_BGR); - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); return 0; } diff --git a/src/capture/portal.c b/src/capture/portal.c index 56072d8..e065f02 100644 --- a/src/capture/portal.c +++ b/src/capture/portal.c @@ -23,6 +23,9 @@ typedef struct { vec2i capture_size; gsr_pipewire_video_dmabuf_data dmabuf_data[GSR_PIPEWIRE_VIDEO_DMABUF_MAX_PLANES]; int num_dmabuf_data; + + bool fast_path_failed; + bool mesa_supports_compute_only_vaapi_copy; } gsr_capture_portal; static void gsr_capture_portal_cleanup_plane_fds(gsr_capture_portal *self) { @@ -302,6 +305,12 @@ static int gsr_capture_portal_start(gsr_capture *cap, gsr_capture_metadata *capt capture_metadata->height = self->params.output_resolution.y; } + self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); + if(self->fast_path_failed) + fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); + + self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6); + return 0; } @@ -309,6 +318,16 @@ static int max_int(int a, int b) { return a > b ? a : b; } +static void gsr_capture_portal_fail_fast_path_if_not_fast(gsr_capture_portal *self, uint32_t pixel_format) { + const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF; + if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) { + self->fast_path_failed = true; + fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used." + " If you experience performance problems in the video then record on a single window on X11 instead or disable 10/12-bit color option in your desktop environment settings," + " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n"); + } +} + static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) { (void)color_conversion; gsr_capture_portal *self = cap->priv; @@ -329,21 +348,45 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca return 0; } + gsr_capture_portal_fail_fast_path_if_not_fast(self, pipewire_fourcc); + const bool is_scaled = self->params.output_resolution.x > 0 && self->params.output_resolution.y > 0; vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size; output_size = scale_keep_aspect_ratio(self->capture_size, output_size); const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) }; - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); // TODO: Handle region crop - gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, - target_pos, output_size, - (vec2i){region.x, region.y}, self->capture_size, - GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB); + /* Fast opengl free path */ + if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { + int fds[4]; + uint32_t offsets[4]; + uint32_t pitches[4]; + uint64_t modifiers[4]; + for(int i = 0; i < self->num_dmabuf_data; ++i) { + fds[i] = self->dmabuf_data[i].fd; + offsets[i] = self->dmabuf_data[i].offset; + pitches[i] = self->dmabuf_data[i].stride; + modifiers[i] = pipewire_modifiers; + } + if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){region.x, region.y}, self->capture_size, target_pos, output_size, pipewire_fourcc, self->capture_size, fds, offsets, pitches, modifiers, self->num_dmabuf_data)) { + fprintf(stderr, "gsr error: gsr_capture_portal_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); + self->fast_path_failed = true; + } + } else { + self->fast_path_failed = true; + } + + if(self->fast_path_failed) { + gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id, + target_pos, output_size, + (vec2i){region.x, region.y}, self->capture_size, + 0.0f, using_external_image, GSR_SOURCE_COLOR_RGB); + } if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) { const vec2d scale = { @@ -361,12 +404,12 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id, (vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y}, (vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height}, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + 0.0f, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); gsr_capture_portal_cleanup_plane_fds(self); diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index 16bc988..d8f4c27 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -34,6 +34,7 @@ typedef struct { gsr_cursor cursor; bool clear_background; + bool fast_path_failed; } gsr_capture_xcomposite; static void gsr_capture_xcomposite_stop(gsr_capture_xcomposite *self) { @@ -116,6 +117,10 @@ static int gsr_capture_xcomposite_start(gsr_capture *cap, gsr_capture_metadata * capture_metadata->height = self->params.output_resolution.y; } + self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9); + if(self->fast_path_failed) + fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n"); + self->window_resize_timer = clock_get_monotonic_seconds(); return 0; } @@ -253,13 +258,25 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata const vec2i target_pos = { max_int(0, capture_metdata->width / 2 - output_size.x / 2), max_int(0, capture_metdata->height / 2 - output_size.y / 2) }; - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); + + /* Fast opengl free path */ + if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metdata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) { + if(!vaapi_copy_egl_image_to_video_surface(self->params.egl, self->window_texture.image, (vec2i){0, 0}, self->texture_size, target_pos, output_size, capture_metdata->video_codec_context, capture_metdata->frame)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_capture: vaapi_copy_egl_image_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n"); + self->fast_path_failed = true; + } + } else { + self->fast_path_failed = true; + } - gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), - target_pos, output_size, - (vec2i){0, 0}, self->texture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + if(self->fast_path_failed) { + gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture), + target_pos, output_size, + (vec2i){0, 0}, self->texture_size, + 0.0f, false, GSR_SOURCE_COLOR_RGB); + } if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -280,13 +297,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + 0.0f, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); return 0; } diff --git a/src/capture/ximage.c b/src/capture/ximage.c index ac00d72..259761d 100644 --- a/src/capture/ximage.c +++ b/src/capture/ximage.c @@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->texture_id, target_pos, output_size, (vec2i){0, 0}, self->capture_size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + 0.0f, false, GSR_SOURCE_COLOR_RGB); if(self->params.record_cursor && self->cursor.visible) { const vec2d scale = { @@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca gsr_color_conversion_draw(color_conversion, self->cursor.texture_id, cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y}, (vec2i){0, 0}, self->cursor.size, - GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB); + 0.0f, false, GSR_SOURCE_COLOR_RGB); self->params.egl->glDisable(GL_SCISSOR_TEST); } diff --git a/src/color_conversion.c b/src/color_conversion.c index 72390f5..c1fe894 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -5,17 +5,21 @@ #include <math.h> #include <assert.h> -// TODO: external texture -// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result -// with a call to mix to choose source/output color. - -#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 -// TODO: Use the minimal barrier required and move this to egl.h -#define GL_ALL_BARRIER_BITS 0xFFFFFFFF - +#define MAX_SHADERS 4 #define MAX_FRAMEBUFFERS 2 #define EXTERNAL_TEXTURE_SHADER_OFFSET 2 +static float abs_f(float v) { + return v >= 0.0f ? v : -v; +} + +#define ROTATE_Z "mat4 rotate_z(in float angle) {\n" \ + " return mat4(cos(angle), -sin(angle), 0.0, 0.0,\n" \ + " sin(angle), cos(angle), 0.0, 0.0,\n" \ + " 0.0, 0.0, 1.0, 0.0,\n" \ + " 0.0, 0.0, 0.0, 1.0);\n" \ + "}\n" + /* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */ /* ITU-R BT2020, full */ @@ -44,10 +48,6 @@ " 0.060118, 0.429412, -0.038049, 0.000000,\n" \ " 0.062745, 0.500000, 0.500000, 1.000000);\n" -static int max_int(int a, int b) { - return a > b ? a : b; -} - static const char* color_format_range_get_transform_matrix(gsr_destination_color color_format, gsr_color_range color_range) { switch(color_format) { case GSR_DESTINATION_COLOR_NV12: { @@ -76,111 +76,187 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color return NULL; } -// TODO: Make alpha blending optional -// TODO: Optimize these shaders. -static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) { +static int load_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); - const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010; - - char compute_shader[2048]; - snprintf(compute_shader, sizeof(compute_shader), - "#version 430 core\n" - "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "uniform sampler2D imgInput;\n" - "uniform ivec2 source_position;\n" - "uniform ivec2 target_position;\n" - "uniform vec2 scale;\n" - "uniform mat2 rotation_matrix;\n" - "layout(%s, binding = 0) uniform image2D imgOutput;\n" - "%s" - "void main() {\n" - " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" - " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" - " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" - " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(imgInput, texCoord);\n" - " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n" - " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n" - " imageStore(imgOutput, texelCoord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "r16" : "r8", color_transform_matrix); - - if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) + + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + ROTATE_Z + "void main() \n" + "{ \n" + " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" + " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n" + " FragColor.w = pixel.a; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) return -1; - uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); - uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); - uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); - uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); return 0; } -static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) { +static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) { const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range); - const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010; - - char compute_shader[2048]; - snprintf(compute_shader, sizeof(compute_shader), - "#version 430 core\n" - "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "uniform sampler2D imgInput;\n" - "uniform ivec2 source_position;\n" - "uniform ivec2 target_position;\n" - "uniform vec2 scale;\n" - "uniform mat2 rotation_matrix;\n" - "layout(%s, binding = 0) uniform image2D imgOutput;\n" - "%s" - "void main() {\n" - " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" - " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" - " vec2 rotated_texel_coord = vec2(texelCoord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n" - " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(imgInput, texCoord * 2.0);\n" - " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n" - " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position/2);\n" - " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n" - " imageStore(imgOutput, texelCoord + target_position/2, vec4(uv_color, 1.0, 1.0));\n" - "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "rg16" : "rg8", color_transform_matrix); - - if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) + + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + ROTATE_Z + "void main() \n" + "{ \n" + " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" + " gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n" + " FragColor.w = pixel.a; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "%s" + "void main() \n" + "{ \n" + "%s" + "} \n", color_transform_matrix, main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) return -1; - uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); - uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); - uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); - uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); return 0; } -static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim) { - char compute_shader[2048]; - snprintf(compute_shader, sizeof(compute_shader), - "#version 430 core\n" - "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n" - "uniform sampler2D imgInput;\n" - "uniform ivec2 source_position;\n" - "uniform ivec2 target_position;\n" - "uniform vec2 scale;\n" - "uniform mat2 rotation_matrix;\n" - "layout(rgba8, binding = 0) uniform image2D imgOutput;\n" - "void main() {\n" - " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n" - " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n" - " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n" - " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n" - " vec4 source_color = texture(imgInput, texCoord);\n" - //" vec4 output_color = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n" - //" vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n" - " imageStore(imgOutput, texelCoord + target_position, source_color);\n" - "}\n", max_local_size_dim, max_local_size_dim); - - if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0) +static unsigned int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, bool external_texture) { + char vertex_shader[2048]; + snprintf(vertex_shader, sizeof(vertex_shader), + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "uniform vec2 offset; \n" + "uniform float rotation; \n" + ROTATE_Z + "void main() \n" + "{ \n" + " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n" + " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n" + "} \n"); + + const char *main_code = + main_code = + " vec4 pixel = texture(tex1, texcoords_out); \n" + " FragColor = pixel; \n"; + + char fragment_shader[2048]; + if(external_texture) { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "#extension GL_OES_EGL_image_external : enable \n" + "#extension GL_OES_EGL_image_external_essl3 : require \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform samplerExternalOES tex1; \n" + "out vec4 FragColor; \n" + "void main() \n" + "{ \n" + "%s" + "} \n", main_code); + } else { + snprintf(fragment_shader, sizeof(fragment_shader), + "#version 300 es \n" + "precision highp float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + "out vec4 FragColor; \n" + "void main() \n" + "{ \n" + "%s" + "} \n", main_code); + } + + if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0) return -1; - uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position"); - uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position"); - uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix"); - uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale"); + gsr_shader_bind_attribute_location(shader, "pos", 0); + gsr_shader_bind_attribute_location(shader, "texcoords", 1); + uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset"); + uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation"); return 0; } @@ -239,11 +315,6 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver memset(self, 0, sizeof(*self)); self->params.egl = params->egl; self->params = *params; - - int max_compute_work_group_invocations = 256; - self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations); - self->max_local_size_dim = sqrt(max_compute_work_group_invocations); - fprintf(stderr, "max local size: %d, max_local_size_dim: %d\n", max_compute_work_group_invocations, self->max_local_size_dim); switch(params->destination_color) { case GSR_DESTINATION_COLOR_NV12: @@ -253,15 +324,27 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], self->max_local_size_dim, params->destination_color, params->color_range) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + if(load_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], params->destination_color, params->color_range, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); goto err; } - if(load_compute_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], self->max_local_size_dim, params->destination_color, params->color_range) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n"); + if(load_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], params->destination_color, params->color_range, false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); goto err; } + + if(self->params.load_external_image_shader) { + if(load_shader_y(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], params->destination_color, params->color_range, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); + goto err; + } + + if(load_shader_uv(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], params->destination_color, params->color_range, true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n"); + goto err; + } + } break; } case GSR_DESTINATION_COLOR_RGB8: { @@ -270,10 +353,17 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver return -1; } - if(load_compute_shader_rgb(&self->shaders[2], self->params.egl, &self->uniforms[2], self->max_local_size_dim) != 0) { - fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n"); + if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->uniforms[0], false) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); goto err; } + + if(self->params.load_external_image_shader) { + if(load_shader_rgb(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], true) != 0) { + fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n"); + goto err; + } + } break; } } @@ -310,119 +400,127 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) { self->framebuffers[i] = 0; } - for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) { + for(int i = 0; i < MAX_SHADERS; ++i) { gsr_shader_deinit(&self->shaders[i]); } self->params.egl = NULL; } -static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) { - /* - rotation_matrix[0][0] = cos(angle); - rotation_matrix[0][1] = -sin(angle); - rotation_matrix[1][0] = sin(angle); - rotation_matrix[1][1] = cos(angle); - The manual matrix code below is the same as this code above, but without floating-point errors. - This is done to remove any blurring caused by these floating-point errors. - */ - switch(rotation) { - case GSR_ROT_0: - rotation_matrix[0][0] = 1.0f; - rotation_matrix[0][1] = 0.0f; - rotation_matrix[1][0] = 0.0f; - rotation_matrix[1][1] = 1.0f; - break; - case GSR_ROT_90: - rotation_matrix[0][0] = 0.0f; - rotation_matrix[0][1] = -1.0f; - rotation_matrix[1][0] = 1.0f; - rotation_matrix[1][1] = 0.0f; - source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); - source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); - break; - case GSR_ROT_180: - rotation_matrix[0][0] = -1.0f; - rotation_matrix[0][1] = 0.0f; - rotation_matrix[1][0] = 0.0f; - rotation_matrix[1][1] = -1.0f; - break; - case GSR_ROT_270: - rotation_matrix[0][0] = 0.0f; - rotation_matrix[0][1] = 1.0f; - rotation_matrix[1][0] = -1.0f; - rotation_matrix[1][1] = 0.0f; - source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5); - source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5); - break; +static void gsr_color_conversion_swizzle_texture_source(gsr_color_conversion *self, gsr_source_color source_color) { + if(source_color == GSR_SOURCE_COLOR_BGR) { + const int swizzle_mask[] = { GL_BLUE, GL_GREEN, GL_RED, 1 }; + self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask); } } -// TODO: Handle source_color -void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) { - vec2f scale = {0.0f, 0.0f}; - if(texture_size.x > 0 && texture_size.y > 0) - scale = (vec2f){ (double)destination_size.x/(double)texture_size.x, (double)destination_size.y/(double)texture_size.y }; +static void gsr_color_conversion_swizzle_reset(gsr_color_conversion *self, gsr_source_color source_color) { + if(source_color == GSR_SOURCE_COLOR_BGR) { + const int swizzle_mask[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA }; + self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask); + } +} - vec2i source_position = {0, 0}; - float rotation_matrix[2][2] = {{0, 0}, {0, 0}}; - gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale); +/* |source_pos| is in pixel coordinates and |source_size| */ +void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color) { + // TODO: Remove this crap + rotation = M_PI*2.0f - rotation; - source_position.x += texture_pos.x; - source_position.y += texture_pos.y; + /* TODO: Do not call this every frame? */ + vec2i dest_texture_size = {0, 0}; + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]); + self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x); + self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D; + self->params.egl->glBindTexture(texture_target, texture_id); - switch(self->params.destination_color) { - case GSR_DESTINATION_COLOR_NV12: - case GSR_DESTINATION_COLOR_P010: { - const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010; - // Y - { - gsr_shader_use(&self->shaders[0]); - self->params.egl->glUniformMatrix2fv(self->uniforms[0].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); - self->params.egl->glUniform2i(self->uniforms[0].source_position, source_position.x, source_position.y); - self->params.egl->glUniform2i(self->uniforms[0].target_position, destination_pos.x, destination_pos.y); - self->params.egl->glUniform2f(self->uniforms[0].scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_R16 : GL_R8); - const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5; - const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5; - self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); - } + vec2i source_texture_size = {0, 0}; + if(external_texture) { + assert(self->params.load_external_image_shader); + source_texture_size = source_size; + } else { + /* TODO: Do not call this every frame? */ + self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_WIDTH, &source_texture_size.x); + self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_HEIGHT, &source_texture_size.y); + } - // UV - { - gsr_shader_use(&self->shaders[1]); - self->params.egl->glUniformMatrix2fv(self->uniforms[1].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); - self->params.egl->glUniform2i(self->uniforms[1].source_position, source_position.x, source_position.y); - self->params.egl->glUniform2i(self->uniforms[1].target_position, destination_pos.x, destination_pos.y); - self->params.egl->glUniform2f(self->uniforms[1].scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[1], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_RG16 : GL_RG8); - const double num_groups_x = (double)texture_size.x*0.5/(double)self->max_local_size_dim + 0.5; - const double num_groups_y = (double)texture_size.y*0.5/(double)self->max_local_size_dim + 0.5; - self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); - } - break; - } - case GSR_DESTINATION_COLOR_RGB8: { - gsr_shader_use(&self->shaders[2]); - self->params.egl->glUniformMatrix2fv(self->uniforms[2].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix); - self->params.egl->glUniform2i(self->uniforms[2].source_position, source_position.x, source_position.y); - self->params.egl->glUniform2i(self->uniforms[2].target_position, destination_pos.x, destination_pos.y); - self->params.egl->glUniform2f(self->uniforms[2].scale, scale.x, scale.y); - self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8); - const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5; - const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5; - self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1); - break; - } + // TODO: Remove this crap + if(abs_f(M_PI * 0.5f - rotation) <= 0.001f || abs_f(M_PI * 1.5f - rotation) <= 0.001f) { + float tmp = source_texture_size.x; + source_texture_size.x = source_texture_size.y; + source_texture_size.y = tmp; + } + + const vec2f pos_norm = { + ((float)source_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f, + ((float)source_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f, + }; + + const vec2f size_norm = { + ((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f, + ((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f, + }; + + const vec2f texture_pos_norm = { + (float)texture_pos.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x), + (float)texture_pos.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y), + }; + + const vec2f texture_size_norm = { + (float)texture_size.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x), + (float)texture_size.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y), + }; + + const float vertices[] = { + -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, + -1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, + + -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y, + -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y + }; + + gsr_color_conversion_swizzle_texture_source(self, source_color); + + self->params.egl->glBindVertexArray(self->vertex_array_object_id); + self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y); + + /* TODO: this, also cleanup */ + //self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id); + self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices); + + { + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]); + //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example) + + const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET : 0; + gsr_shader_use(&self->shaders[shader_index]); + self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation); + self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y); + self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); } - self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT - self->params.egl->glUseProgram(0); + if(self->params.num_destination_textures > 1) { + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]); + //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); + const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET + 1 : 1; + gsr_shader_use(&self->shaders[shader_index]); + self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation); + self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y); + self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6); + } + + self->params.egl->glBindVertexArray(0); + gsr_shader_use_none(&self->shaders[0]); self->params.egl->glBindTexture(texture_target, 0); + self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); + + gsr_color_conversion_swizzle_reset(self, source_color); } void gsr_color_conversion_clear(gsr_color_conversion *self) { @@ -459,7 +557,3 @@ void gsr_color_conversion_clear(gsr_color_conversion *self) { self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0); } - -gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation) { - return (gsr_rotation)monitor_rotation; -} @@ -284,7 +284,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGenTextures, "glGenTextures" }, { (void**)&self->glDeleteTextures, "glDeleteTextures" }, { (void**)&self->glBindTexture, "glBindTexture" }, - { (void**)&self->glBindImageTexture, "glBindImageTexture" }, { (void**)&self->glTexParameteri, "glTexParameteri" }, { (void**)&self->glTexParameteriv, "glTexParameteriv" }, { (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" }, @@ -294,8 +293,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGenFramebuffers, "glGenFramebuffers" }, { (void**)&self->glBindFramebuffer, "glBindFramebuffer" }, { (void**)&self->glDeleteFramebuffers, "glDeleteFramebuffers" }, - { (void**)&self->glDispatchCompute, "glDispatchCompute" }, - { (void**)&self->glMemoryBarrier, "glMemoryBarrier" }, { (void**)&self->glViewport, "glViewport" }, { (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" }, { (void**)&self->glDrawBuffers, "glDrawBuffers" }, @@ -332,14 +329,11 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGetUniformLocation, "glGetUniformLocation" }, { (void**)&self->glUniform1f, "glUniform1f" }, { (void**)&self->glUniform2f, "glUniform2f" }, - { (void**)&self->glUniform2i, "glUniform2i" }, - { (void**)&self->glUniformMatrix2fv, "glUniformMatrix2fv" }, { (void**)&self->glDebugMessageCallback, "glDebugMessageCallback" }, { (void**)&self->glScissor, "glScissor" }, { (void**)&self->glReadPixels, "glReadPixels" }, { (void**)&self->glMapBuffer, "glMapBuffer" }, { (void**)&self->glUnmapBuffer, "glUnmapBuffer" }, - { (void**)&self->glGetIntegerv, "glGetIntegerv" }, { NULL, NULL } }; diff --git a/src/encoder/video/software.c b/src/encoder/video/software.c index 627cdea..3649ff1 100644 --- a/src/encoder/video/software.c +++ b/src/encoder/video/software.c @@ -83,8 +83,8 @@ static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); // cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); - //self->params.egl->glFlush(); - //self->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); } static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c index 1d5dae0..8bb2f08 100644 --- a/src/encoder/video/vaapi.c +++ b/src/encoder/video/vaapi.c @@ -123,8 +123,8 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]); self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); while(self->params.egl->glGetError()) {} while(self->params.egl->eglGetError() != EGL_SUCCESS){} diff --git a/src/image_writer.c b/src/image_writer.c index e153a34..c4d66f4 100644 --- a/src/image_writer.c +++ b/src/image_writer.c @@ -17,7 +17,7 @@ bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int widt self->egl = egl; self->width = width; self->height = height; - self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGBA8, GL_RGBA, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */ + self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGB8, GL_RGB, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */ if(self->texture == 0) { fprintf(stderr, "gsr error: gsr_image_writer_init: failed to create texture\n"); return false; @@ -50,10 +50,10 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const bool success = false; switch(image_format) { case GSR_IMAGE_FORMAT_JPEG: - success = stbi_write_jpg(filepath, self->width, self->height, 4, data, quality); + success = stbi_write_jpg(filepath, self->width, self->height, 3, data, quality); break; case GSR_IMAGE_FORMAT_PNG: - success = stbi_write_png(filepath, self->width, self->height, 4, data, 0); + success = stbi_write_png(filepath, self->width, self->height, 3, data, 0); break; } @@ -65,7 +65,7 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self, const char *filepath, gsr_image_format image_format, int quality) { assert(self->source == GSR_IMAGE_WRITER_SOURCE_OPENGL); - uint8_t *frame_data = malloc(self->width * self->height * 4); + uint8_t *frame_data = malloc(self->width * self->height * 3); if(!frame_data) { fprintf(stderr, "gsr error: gsr_image_writer_write_to_file: failed to allocate memory for image frame\n"); return false; @@ -74,7 +74,7 @@ static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self // TODO: hdr support self->egl->glBindTexture(GL_TEXTURE_2D, self->texture); // We could use glGetTexSubImage, but it's only available starting from opengl 4.5 - self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame_data); + self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, frame_data); self->egl->glBindTexture(GL_TEXTURE_2D, 0); self->egl->glFlush(); diff --git a/src/main.cpp b/src/main.cpp index f0a3e06..6adeb05 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1072,9 +1072,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // TODO: More quality options if(low_power) av_dict_set_int(&options, "low_power", 1, 0); - // Improves performance but increases vram. - // TODO: Might need a different async_depth for optimal performance on different amd/intel gpus - //av_dict_set_int(&options, "async_depth", 3, 0); + // Improves performance but increases vram + //av_dict_set_int(&options, "async_depth", 8, 0); if(codec_context->codec_id == AV_CODEC_ID_H264) { // Removed because it causes stutter in games for some people diff --git a/src/shader.c b/src/shader.c index b9fbb62..dcb956b 100644 --- a/src/shader.c +++ b/src/shader.c @@ -36,36 +36,28 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s return shader_id; } -static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) { +static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) { unsigned int vertex_shader_id = 0; unsigned int fragment_shader_id = 0; - unsigned int compute_shader_id = 0; unsigned int program_id = 0; int linked = 0; - bool success = false; if(vertex_shader) { vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader); if(vertex_shader_id == 0) - goto done; + goto err; } if(fragment_shader) { fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader); if(fragment_shader_id == 0) - goto done; - } - - if(compute_shader) { - compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader); - if(compute_shader_id == 0) - goto done; + goto err; } program_id = egl->glCreateProgram(); if(program_id == 0) { fprintf(stderr, "gsr error: load_program: failed to create shader program, error: %d\n", egl->glGetError()); - goto done; + goto err; } if(vertex_shader_id) @@ -74,9 +66,6 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const if(fragment_shader_id) egl->glAttachShader(program_id, fragment_shader_id); - if(compute_shader_id) - egl->glAttachShader(program_id, compute_shader_id); - egl->glLinkProgram(program_id); egl->glGetProgramiv(program_id, GL_LINK_STATUS, &linked); @@ -90,36 +79,37 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const fprintf(stderr, "gsr error: load program: linking shader program failed, error:\n%s\n", info_log); } - goto done; + goto err; } - success = true; - done: - - if(!success) { - if(program_id) - egl->glDeleteProgram(program_id); - } - if(compute_shader_id) - egl->glDeleteShader(compute_shader_id); if(fragment_shader_id) egl->glDeleteShader(fragment_shader_id); if(vertex_shader_id) egl->glDeleteShader(vertex_shader_id); + return program_id; + + err: + if(program_id) + egl->glDeleteProgram(program_id); + if(fragment_shader_id) + egl->glDeleteShader(fragment_shader_id); + if(vertex_shader_id) + egl->glDeleteShader(vertex_shader_id); + return 0; } -int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) { +int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) { assert(egl); self->egl = egl; self->program_id = 0; - if(!vertex_shader && !fragment_shader && !compute_shader) { - fprintf(stderr, "gsr error: gsr_shader_init: vertex, fragment shader and compute shaders can't be NULL at the same time\n"); + if(!vertex_shader && !fragment_shader) { + fprintf(stderr, "gsr error: gsr_shader_init: vertex shader and fragment shader can't be NULL at the same time\n"); return -1; } - self->program_id = load_program(self->egl, vertex_shader, fragment_shader, compute_shader); + self->program_id = load_program(self->egl, vertex_shader, fragment_shader); if(self->program_id == 0) return -1; diff --git a/src/utils.c b/src/utils.c index 943fb2d..325f750 100644 --- a/src/utils.c +++ b/src/utils.c @@ -14,8 +14,10 @@ #include <xf86drmMode.h> #include <xf86drm.h> +#include <libdrm/drm_fourcc.h> #include <X11/Xatom.h> #include <X11/extensions/Xrandr.h> +#include <va/va_drmcommon.h> #include <libavcodec/avcodec.h> #include <libavutil/hwcontext_vaapi.h> @@ -661,6 +663,241 @@ bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) { return device_context->type == AV_HWDEVICE_TYPE_VAAPI; } +static uint32_t drm_fourcc_to_va_fourcc(uint32_t drm_fourcc) { + switch(drm_fourcc) { + case DRM_FORMAT_XRGB8888: return VA_FOURCC_BGRX; + case DRM_FORMAT_XBGR8888: return VA_FOURCC_RGBX; + case DRM_FORMAT_RGBX8888: return VA_FOURCC_XBGR; + case DRM_FORMAT_BGRX8888: return VA_FOURCC_XRGB; + case DRM_FORMAT_ARGB8888: return VA_FOURCC_BGRA; + case DRM_FORMAT_ABGR8888: return VA_FOURCC_RGBA; + case DRM_FORMAT_RGBA8888: return VA_FOURCC_ABGR; + case DRM_FORMAT_BGRA8888: return VA_FOURCC_ARGB; + default: return drm_fourcc; + } +} + +bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) { + VAConfigID config_id = 0; + VAContextID context_id = 0; + VASurfaceID input_surface_id = 0; + VABufferID buffer_id = 0; + bool success = true; + + VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context); + if(!va_dpy) { + success = false; + goto done; + } + + VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %s\n", vaErrorStr(va_status)); + success = false; + goto done; + } + + VASurfaceID output_surface_id = (uintptr_t)video_frame->data[3]; + va_status = vaCreateContext(va_dpy, config_id, size.x, size.y, VA_PROGRESSIVE, &output_surface_id, 1, &context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %s\n", vaErrorStr(va_status)); + success = false; + goto done; + } + + VADRMPRIMESurfaceDescriptor buf = {0}; + buf.fourcc = drm_fourcc_to_va_fourcc(format);//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10 + buf.width = size.x; + buf.height = size.y; + buf.num_objects = num_planes; + buf.num_layers = 1; + buf.layers[0].drm_format = format; + buf.layers[0].num_planes = buf.num_objects; + for(int i = 0; i < num_planes; ++i) { + buf.objects[i].fd = fds[i]; + buf.objects[i].size = size.y * pitches[i]; // TODO: + buf.objects[i].drm_format_modifier = modifiers[i]; + + buf.layers[0].object_index[i] = i; + buf.layers[0].offset[i] = offsets[i]; + buf.layers[0].pitch[i] = pitches[i]; + } + + VASurfaceAttrib attribs[2] = {0}; + attribs[0].type = VASurfaceAttribMemoryType; + attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[0].value.type = VAGenericValueTypeInteger; + attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2; + attribs[1].type = VASurfaceAttribExternalBufferDescriptor; + attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[1].value.type = VAGenericValueTypePointer; + attribs[1].value.value.p = &buf; + + // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10 + // TODO: Max size same as source_size + va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, size.x, size.y, &input_surface_id, 1, attribs, 2); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %s\n", vaErrorStr(va_status)); + success = false; + goto done; + } + + const VARectangle source_region = { + .x = source_pos.x, + .y = source_pos.y, + .width = source_size.x, + .height = source_size.y + }; + + const VARectangle output_region = { + .x = dest_pos.x, + .y = dest_pos.y, + .width = dest_size.x, + .height = dest_size.y + }; + + const bool scaled = dest_size.x != source_size.x || dest_size.y != source_size.y; + + // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi! + VAProcPipelineParameterBuffer params = {0}; + params.surface = input_surface_id; + params.surface_region = NULL; + params.surface_region = &source_region; + params.output_region = &output_region; + params.output_background_color = 0; + params.filter_flags = scaled ? (VA_FILTER_SCALING_HQ | VA_FILTER_INTERPOLATION_BILINEAR) : 0; + params.pipeline_flags = VA_PROC_PIPELINE_FAST; + + params.input_color_properties.colour_primaries = 1; + params.input_color_properties.transfer_characteristics = 1; + params.input_color_properties.matrix_coefficients = 1; + params.surface_color_standard = VAProcColorStandardBT709; // TODO: + params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; + + params.output_color_properties.colour_primaries = 1; + params.output_color_properties.transfer_characteristics = 1; + params.output_color_properties.matrix_coefficients = 1; + params.output_color_standard = VAProcColorStandardBT709; // TODO: + params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; + + params.processing_mode = VAProcPerformanceMode; + + // VAProcPipelineCaps pipeline_caps = {0}; + // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy, + // self->context_id, + // NULL, 0, + // &pipeline_caps); + // if(va_status == VA_STATUS_SUCCESS) { + // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags); + // } + + // TODO: params.output_hdr_metadata + + // TODO: + // if (first surface to render) + // pipeline_param->output_background_color = 0xff000000; // black + + va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, ¶ms, &buffer_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaBeginPicture(va_dpy, context_id, output_surface_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1); + if(va_status != VA_STATUS_SUCCESS) { + vaEndPicture(va_dpy, context_id); + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaEndPicture(va_dpy, context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + // vaSyncBuffer(va_dpy, buffer_id, 1000 * 1000 * 1000); + // vaSyncSurface(va_dpy, input_surface_id); + // vaSyncSurface(va_dpy, output_surface_id); + + done: + if(buffer_id) + vaDestroyBuffer(va_dpy, buffer_id); + + if(input_surface_id) + vaDestroySurfaces(va_dpy, &input_surface_id, 1); + + if(context_id) + vaDestroyContext(va_dpy, context_id); + + if(config_id) + vaDestroyConfig(va_dpy, config_id); + + return success; +} + +bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame) { + if(!image) + return false; + + int texture_fourcc = 0; + int texture_num_planes = 0; + uint64_t texture_modifiers = 0; + if(!egl->eglExportDMABUFImageQueryMESA(egl->egl_display, image, &texture_fourcc, &texture_num_planes, &texture_modifiers)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageQueryMESA failed\n"); + return false; + } + + if(texture_num_planes <= 0 || texture_num_planes > 8) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: expected planes size to be 0<planes<=8 for drm buf, got %d planes\n", texture_num_planes); + return false; + } + + int texture_fds[8]; + int32_t texture_strides[8]; + int32_t texture_offsets[8]; + + while(egl->eglGetError() != EGL_SUCCESS){} + if(!egl->eglExportDMABUFImageMESA(egl->egl_display, image, texture_fds, texture_strides, texture_offsets)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageMESA failed, error: %d\n", egl->eglGetError()); + return false; + } + + int fds[8]; + uint32_t offsets[8]; + uint32_t pitches[8]; + uint64_t modifiers[8]; + for(int i = 0; i < texture_num_planes; ++i) { + fds[i] = texture_fds[i]; + offsets[i] = texture_offsets[i]; + pitches[i] = texture_strides[i]; + modifiers[i] = texture_modifiers; + + if(fds[i] == -1) + texture_num_planes = i; + } + const bool success = texture_num_planes > 0 && vaapi_copy_drm_planes_to_video_surface(video_codec_context, video_frame, source_pos, source_size, dest_pos, dest_size, texture_fourcc, source_size, fds, offsets, pitches, modifiers, texture_num_planes); + + for(int i = 0; i < texture_num_planes; ++i) { + if(texture_fds[i] > 0) { + close(texture_fds[i]); + texture_fds[i] = -1; + } + } + + return success; +} + vec2i scale_keep_aspect_ratio(vec2i from, vec2i to) { if(from.x == 0 || from.y == 0) return (vec2i){0, 0}; |