aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2025-03-29 13:27:20 +0100
committerdec05eba <dec05eba@protonmail.com>2025-03-29 13:29:10 +0100
commita41a32cb9079dc3c78414b7b0cdc68243333020d (patch)
tree5776bf16deb9f4e18cf744d474e0d1f0dc19c081
parent634a563bc0e7d23040e70f90e7ccc498aa7db41e (diff)
WIP: use compute shader instead of graphics shader for better performance (especially on amd)
-rw-r--r--include/color_conversion.h26
-rw-r--r--include/egl.h14
-rw-r--r--include/image_writer.h2
-rw-r--r--include/shader.h2
-rw-r--r--include/utils.h2
-rw-r--r--src/capture/kms.c101
-rw-r--r--src/capture/nvfbc.c10
-rw-r--r--src/capture/portal.c61
-rw-r--r--src/capture/xcomposite.c35
-rw-r--r--src/capture/ximage.c4
-rw-r--r--src/color_conversion.c516
-rw-r--r--src/egl.c6
-rw-r--r--src/encoder/video/software.c4
-rw-r--r--src/encoder/video/vaapi.c4
-rw-r--r--src/image_writer.c10
-rw-r--r--src/main.cpp5
-rw-r--r--src/shader.c48
-rw-r--r--src/utils.c237
18 files changed, 336 insertions, 751 deletions
diff --git a/include/color_conversion.h b/include/color_conversion.h
index b80be21..37eeb8e 100644
--- a/include/color_conversion.h
+++ b/include/color_conversion.h
@@ -2,9 +2,12 @@
#define GSR_COLOR_CONVERSION_H
#include "shader.h"
+#include "defs.h"
#include "vec2.h"
#include <stdbool.h>
+#define GSR_COLOR_CONVERSION_MAX_SHADERS 3
+
typedef enum {
GSR_COLOR_RANGE_LIMITED,
GSR_COLOR_RANGE_FULL
@@ -26,9 +29,18 @@ typedef enum {
GSR_DESTINATION_COLOR_RGB8
} gsr_destination_color;
+typedef enum {
+ GSR_ROT_0,
+ GSR_ROT_90,
+ GSR_ROT_180,
+ GSR_ROT_270
+} gsr_rotation;
+
typedef struct {
- int offset;
- int rotation;
+ int rotation_matrix;
+ int source_position;
+ int target_position;
+ int scale;
} gsr_color_uniforms;
typedef struct {
@@ -45,19 +57,23 @@ typedef struct {
typedef struct {
gsr_color_conversion_params params;
- gsr_color_uniforms uniforms[4];
- gsr_shader shaders[4];
+ gsr_color_uniforms uniforms[GSR_COLOR_CONVERSION_MAX_SHADERS];
+ gsr_shader shaders[GSR_COLOR_CONVERSION_MAX_SHADERS];
unsigned int framebuffers[2];
unsigned int vertex_array_object_id;
unsigned int vertex_buffer_object_id;
+
+ int max_local_size_dim;
} gsr_color_conversion;
int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params);
void gsr_color_conversion_deinit(gsr_color_conversion *self);
-void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color);
+void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color);
void gsr_color_conversion_clear(gsr_color_conversion *self);
+gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation);
+
#endif /* GSR_COLOR_CONVERSION_H */
diff --git a/include/egl.h b/include/egl.h
index 0d08270..643ab30 100644
--- a/include/egl.h
+++ b/include/egl.h
@@ -98,7 +98,7 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_TEXTURE_EXTERNAL_OES 0x8D65
#define GL_RED 0x1903
#define GL_GREEN 0x1904
-#define GL_BLUE 0x1905
+#define GL_BLUE 0x1905
#define GL_ALPHA 0x1906
#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46
#define GL_RG 0x8227
@@ -111,6 +111,7 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_R16 0x822A
#define GL_RG16 0x822C
#define GL_RGB16 0x8054
+#define GL_RGBA32F 0x8814
#define GL_UNSIGNED_BYTE 0x1401
#define GL_COLOR_BUFFER_BIT 0x00004000
#define GL_TEXTURE_WRAP_S 0x2802
@@ -134,6 +135,10 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_SCISSOR_TEST 0x0C11
#define GL_PACK_ALIGNMENT 0x0D05
#define GL_UNPACK_ALIGNMENT 0x0CF5
+#define GL_READ_ONLY 0x88B8
+#define GL_WRITE_ONLY 0x88B9
+#define GL_READ_WRITE 0x88BA
+#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS 0x90EB
#define GL_VENDOR 0x1F00
#define GL_RENDERER 0x1F01
@@ -143,6 +148,7 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_INFO_LOG_LENGTH 0x8B84
#define GL_FRAGMENT_SHADER 0x8B30
#define GL_VERTEX_SHADER 0x8B31
+#define GL_COMPUTE_SHADER 0x91B9
#define GL_COMPILE_STATUS 0x8B81
#define GL_LINK_STATUS 0x8B82
@@ -231,6 +237,7 @@ struct gsr_egl {
void (*glGenTextures)(int n, unsigned int *textures);
void (*glDeleteTextures)(int n, const unsigned int *texture);
void (*glBindTexture)(unsigned int target, unsigned int texture);
+ void (*glBindImageTexture)(unsigned int unit, unsigned int texture, int level, unsigned char layered, int layer, unsigned int access, unsigned int format);
void (*glTexParameteri)(unsigned int target, unsigned int pname, int param);
void (*glTexParameteriv)(unsigned int target, unsigned int pname, const int *params);
void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params);
@@ -240,6 +247,8 @@ struct gsr_egl {
void (*glGenFramebuffers)(int n, unsigned int *framebuffers);
void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer);
void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers);
+ void (*glDispatchCompute)(unsigned int num_groups_x, unsigned int num_groups_y, unsigned int num_groups_z);
+ void (*glMemoryBarrier)(unsigned int barriers);
void (*glViewport)(int x, int y, int width, int height);
void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level);
void (*glDrawBuffers)(int n, const unsigned int *bufs);
@@ -276,11 +285,14 @@ struct gsr_egl {
int (*glGetUniformLocation)(unsigned int program, const char *name);
void (*glUniform1f)(int location, float v0);
void (*glUniform2f)(int location, float v0, float v1);
+ void (*glUniform2i)(int location, int v0, int v1);
+ void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value);
void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam);
void (*glScissor)(int x, int y, int width, int height);
void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels);
void* (*glMapBuffer)(unsigned int target, unsigned int access);
unsigned char (*glUnmapBuffer)(unsigned int target);
+ void (*glGetIntegerv)(unsigned int pname, int *params);
};
bool gsr_egl_load(gsr_egl *self, gsr_window *window, bool is_monitor_capture, bool enable_debug);
diff --git a/include/image_writer.h b/include/image_writer.h
index 400edd0..65e7497 100644
--- a/include/image_writer.h
+++ b/include/image_writer.h
@@ -25,7 +25,7 @@ typedef struct {
} gsr_image_writer;
bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int width, int height);
-/* |memory| is taken as a reference */
+/* |memory| is taken as a reference. The data is expected to be in rgba8 format (8 bit rgba) */
bool gsr_image_writer_init_memory(gsr_image_writer *self, const void *memory, int width, int height);
void gsr_image_writer_deinit(gsr_image_writer *self);
diff --git a/include/shader.h b/include/shader.h
index 57d1096..8bc1104 100644
--- a/include/shader.h
+++ b/include/shader.h
@@ -9,7 +9,7 @@ typedef struct {
} gsr_shader;
/* |vertex_shader| or |fragment_shader| may be NULL */
-int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader);
+int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader);
void gsr_shader_deinit(gsr_shader *self);
int gsr_shader_bind_attribute_location(gsr_shader *self, const char *attribute, int location);
diff --git a/include/utils.h b/include/utils.h
index fd340e8..873e6e4 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -64,8 +64,6 @@ int create_directory_recursive(char *path);
/* |img_attr| needs to be at least 44 in size */
void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes, bool use_modifier);
bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context);
-bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes);
-bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame);
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to);
diff --git a/src/capture/kms.c b/src/capture/kms.c
index 578fded..e98fab7 100644
--- a/src/capture/kms.c
+++ b/src/capture/kms.c
@@ -53,10 +53,6 @@ typedef struct {
bool is_x11;
gsr_cursor x11_cursor;
- bool performance_error_shown;
- bool fast_path_failed;
- bool mesa_supports_compute_only_vaapi_copy;
-
//int drm_fd;
//uint64_t prev_sequence;
//bool damaged;
@@ -229,17 +225,6 @@ static int gsr_capture_kms_start(gsr_capture *cap, gsr_capture_metadata *capture
capture_metadata->height = self->capture_size.y;
}
- self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
- if(self->fast_path_failed)
- fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
-
- //if(self->params.hdr) {
- // self->fast_path_failed = true;
- // fprintf(stderr, "gsr warning: gsr_capture_kms_start: recording with hdr requires shader color conversion which might be slow. If this is an issue record with -w portal instead (which converts HDR to SDR)\n");
- //}
-
- self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
-
self->last_time_monitor_check = clock_get_monotonic_seconds();
return 0;
}
@@ -274,16 +259,6 @@ static void gsr_capture_kms_on_event(gsr_capture *cap, gsr_egl *egl) {
// }
// }
-static float monitor_rotation_to_radians(gsr_monitor_rotation rot) {
- switch(rot) {
- case GSR_MONITOR_ROT_0: return 0.0f;
- case GSR_MONITOR_ROT_90: return M_PI_2;
- case GSR_MONITOR_ROT_180: return M_PI;
- case GSR_MONITOR_ROT_270: return M_PI + M_PI_2;
- }
- return 0.0f;
-}
-
static gsr_kms_response_item* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) {
for(int i = 0; i < kms_response->num_items; ++i) {
if(kms_response->items[i].connector_id == connector_id && !kms_response->items[i].is_cursor)
@@ -449,7 +424,7 @@ static gsr_kms_response_item* find_cursor_drm_if_on_monitor(gsr_capture_kms *sel
return cursor_drm_fd;
}
-static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, float texture_rotation, vec2i output_size, vec2i framebuffer_size) {
+static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, vec2i output_size, vec2i framebuffer_size) {
const vec2d scale = {
self->capture_size.x == 0 ? 0 : (double)output_size.x / (double)self->capture_size.x,
self->capture_size.y == 0 ? 0 : (double)output_size.y / (double)self->capture_size.y
@@ -523,7 +498,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->cursor_texture_id,
cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y},
(vec2i){0, 0}, cursor_size,
- texture_rotation, cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
+ gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -551,7 +526,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id,
cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y},
(vec2i){0, 0}, self->x11_cursor.size,
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -604,16 +579,6 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) {
self->capture_size = rotate_capture_size_if_rotated(self, monitor.size);
}
-static void gsr_capture_kms_fail_fast_path_if_not_fast(gsr_capture_kms *self, uint32_t pixel_format) {
- const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
- if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
- self->fast_path_failed = true;
- fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
- " If you experience performance problems in the video then record on a single window on X11 or use portal capture option instead or disable 10/12-bit color option in your desktop environment settings,"
- " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
- }
-}
-
static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
gsr_capture_kms *self = cap->priv;
@@ -645,15 +610,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata))
gsr_kms_set_hdr_metadata(self, drm_fd);
- if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
- self->performance_error_shown = true;
- self->fast_path_failed = true;
- fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used."
- " If you experience performance problems in the video then record a single window on X11 or use portal capture option instead\n");
- }
-
- gsr_capture_kms_fail_fast_path_if_not_fast(self, drm_fd->pixel_format);
-
self->capture_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
if(self->params.region_size.x > 0 && self->params.region_size.y > 0)
self->capture_size = self->params.region_size;
@@ -662,7 +618,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
- const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
gsr_capture_kms_update_capture_size_change(self, color_conversion, target_pos, drm_fd);
@@ -673,42 +628,20 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
capture_pos.x += self->params.region_position.x;
capture_pos.y += self->params.region_position.y;
- self->params.egl->glFlush();
- self->params.egl->glFinish();
-
- /* Fast opengl free path */
- if(!self->fast_path_failed && self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
- int fds[4];
- uint32_t offsets[4];
- uint32_t pitches[4];
- uint64_t modifiers[4];
- for(int i = 0; i < drm_fd->num_dma_bufs; ++i) {
- fds[i] = drm_fd->dma_buf[i].fd;
- offsets[i] = drm_fd->dma_buf[i].offset;
- pitches[i] = drm_fd->dma_buf[i].pitch;
- modifiers[i] = drm_fd->modifier;
- }
- if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){capture_pos.x, capture_pos.y}, self->capture_size, target_pos, output_size, drm_fd->pixel_format, (vec2i){drm_fd->width, drm_fd->height}, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs)) {
- fprintf(stderr, "gsr error: gsr_capture_kms_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
- self->fast_path_failed = true;
- }
- } else {
- self->fast_path_failed = true;
- }
-
- if(self->fast_path_failed) {
- EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
- if(image) {
- gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
- self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
- }
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
- gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
- target_pos, output_size,
- capture_pos, self->capture_size,
- texture_rotation, self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
+ EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
+ if(image) {
+ gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
+ self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
}
+ gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
+ target_pos, output_size,
+ capture_pos, self->capture_size,
+ gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
+
if(self->params.record_cursor) {
gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane);
// The cursor is handled by x11 on x11 instead of using the cursor drm plane because on prime systems with a dedicated nvidia gpu
@@ -722,12 +655,12 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
render_x11_cursor(self, color_conversion, cursor_monitor_offset, target_pos, output_size);
} else if(cursor_drm_fd) {
const vec2i framebuffer_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
- render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, texture_rotation, output_size, framebuffer_size);
+ render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, output_size, framebuffer_size);
}
}
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
gsr_capture_kms_cleanup_kms_fds(self);
diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c
index b6a3671..5f47b00 100644
--- a/src/capture/nvfbc.c
+++ b/src/capture/nvfbc.c
@@ -390,16 +390,16 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap
return 0;
}
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex],
target_pos, (vec2i){output_size.x, output_size.y},
self->params.region_position, frame_size,
- 0.0f, false, GSR_SOURCE_COLOR_BGR);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR);
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
return 0;
}
diff --git a/src/capture/portal.c b/src/capture/portal.c
index e065f02..56072d8 100644
--- a/src/capture/portal.c
+++ b/src/capture/portal.c
@@ -23,9 +23,6 @@ typedef struct {
vec2i capture_size;
gsr_pipewire_video_dmabuf_data dmabuf_data[GSR_PIPEWIRE_VIDEO_DMABUF_MAX_PLANES];
int num_dmabuf_data;
-
- bool fast_path_failed;
- bool mesa_supports_compute_only_vaapi_copy;
} gsr_capture_portal;
static void gsr_capture_portal_cleanup_plane_fds(gsr_capture_portal *self) {
@@ -305,12 +302,6 @@ static int gsr_capture_portal_start(gsr_capture *cap, gsr_capture_metadata *capt
capture_metadata->height = self->params.output_resolution.y;
}
- self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
- if(self->fast_path_failed)
- fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
-
- self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
-
return 0;
}
@@ -318,16 +309,6 @@ static int max_int(int a, int b) {
return a > b ? a : b;
}
-static void gsr_capture_portal_fail_fast_path_if_not_fast(gsr_capture_portal *self, uint32_t pixel_format) {
- const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
- if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
- self->fast_path_failed = true;
- fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
- " If you experience performance problems in the video then record on a single window on X11 instead or disable 10/12-bit color option in your desktop environment settings,"
- " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
- }
-}
-
static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
(void)color_conversion;
gsr_capture_portal *self = cap->priv;
@@ -348,45 +329,21 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
return 0;
}
- gsr_capture_portal_fail_fast_path_if_not_fast(self, pipewire_fourcc);
-
const bool is_scaled = self->params.output_resolution.x > 0 && self->params.output_resolution.y > 0;
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
// TODO: Handle region crop
- /* Fast opengl free path */
- if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
- int fds[4];
- uint32_t offsets[4];
- uint32_t pitches[4];
- uint64_t modifiers[4];
- for(int i = 0; i < self->num_dmabuf_data; ++i) {
- fds[i] = self->dmabuf_data[i].fd;
- offsets[i] = self->dmabuf_data[i].offset;
- pitches[i] = self->dmabuf_data[i].stride;
- modifiers[i] = pipewire_modifiers;
- }
- if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){region.x, region.y}, self->capture_size, target_pos, output_size, pipewire_fourcc, self->capture_size, fds, offsets, pitches, modifiers, self->num_dmabuf_data)) {
- fprintf(stderr, "gsr error: gsr_capture_portal_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
- self->fast_path_failed = true;
- }
- } else {
- self->fast_path_failed = true;
- }
-
- if(self->fast_path_failed) {
- gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
- target_pos, output_size,
- (vec2i){region.x, region.y}, self->capture_size,
- 0.0f, using_external_image, GSR_SOURCE_COLOR_RGB);
- }
+ gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
+ target_pos, output_size,
+ (vec2i){region.x, region.y}, self->capture_size,
+ GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB);
if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) {
const vec2d scale = {
@@ -404,12 +361,12 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id,
(vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y},
(vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height},
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
gsr_capture_portal_cleanup_plane_fds(self);
diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c
index d8f4c27..16bc988 100644
--- a/src/capture/xcomposite.c
+++ b/src/capture/xcomposite.c
@@ -34,7 +34,6 @@ typedef struct {
gsr_cursor cursor;
bool clear_background;
- bool fast_path_failed;
} gsr_capture_xcomposite;
static void gsr_capture_xcomposite_stop(gsr_capture_xcomposite *self) {
@@ -117,10 +116,6 @@ static int gsr_capture_xcomposite_start(gsr_capture *cap, gsr_capture_metadata *
capture_metadata->height = self->params.output_resolution.y;
}
- self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
- if(self->fast_path_failed)
- fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
-
self->window_resize_timer = clock_get_monotonic_seconds();
return 0;
}
@@ -258,25 +253,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
const vec2i target_pos = { max_int(0, capture_metdata->width / 2 - output_size.x / 2), max_int(0, capture_metdata->height / 2 - output_size.y / 2) };
- self->params.egl->glFlush();
- self->params.egl->glFinish();
-
- /* Fast opengl free path */
- if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metdata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
- if(!vaapi_copy_egl_image_to_video_surface(self->params.egl, self->window_texture.image, (vec2i){0, 0}, self->texture_size, target_pos, output_size, capture_metdata->video_codec_context, capture_metdata->frame)) {
- fprintf(stderr, "gsr error: gsr_capture_xcomposite_capture: vaapi_copy_egl_image_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
- self->fast_path_failed = true;
- }
- } else {
- self->fast_path_failed = true;
- }
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
- if(self->fast_path_failed) {
- gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
- target_pos, output_size,
- (vec2i){0, 0}, self->texture_size,
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
- }
+ gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
+ target_pos, output_size,
+ (vec2i){0, 0}, self->texture_size,
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -297,13 +280,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
return 0;
}
diff --git a/src/capture/ximage.c b/src/capture/ximage.c
index 259761d..ac00d72 100644
--- a/src/capture/ximage.c
+++ b/src/capture/ximage.c
@@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_id,
target_pos, output_size,
(vec2i){0, 0}, self->capture_size,
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
- 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
diff --git a/src/color_conversion.c b/src/color_conversion.c
index c1fe894..72390f5 100644
--- a/src/color_conversion.c
+++ b/src/color_conversion.c
@@ -5,20 +5,16 @@
#include <math.h>
#include <assert.h>
-#define MAX_SHADERS 4
-#define MAX_FRAMEBUFFERS 2
-#define EXTERNAL_TEXTURE_SHADER_OFFSET 2
+// TODO: external texture
+// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result
+// with a call to mix to choose source/output color.
-static float abs_f(float v) {
- return v >= 0.0f ? v : -v;
-}
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+// TODO: Use the minimal barrier required and move this to egl.h
+#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
-#define ROTATE_Z "mat4 rotate_z(in float angle) {\n" \
- " return mat4(cos(angle), -sin(angle), 0.0, 0.0,\n" \
- " sin(angle), cos(angle), 0.0, 0.0,\n" \
- " 0.0, 0.0, 1.0, 0.0,\n" \
- " 0.0, 0.0, 0.0, 1.0);\n" \
- "}\n"
+#define MAX_FRAMEBUFFERS 2
+#define EXTERNAL_TEXTURE_SHADER_OFFSET 2
/* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */
@@ -48,6 +44,10 @@ static float abs_f(float v) {
" 0.060118, 0.429412, -0.038049, 0.000000,\n" \
" 0.062745, 0.500000, 0.500000, 1.000000);\n"
+static int max_int(int a, int b) {
+ return a > b ? a : b;
+}
+
static const char* color_format_range_get_transform_matrix(gsr_destination_color color_format, gsr_color_range color_range) {
switch(color_format) {
case GSR_DESTINATION_COLOR_NV12: {
@@ -76,187 +76,111 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color
return NULL;
}
-static int load_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
+// TODO: Make alpha blending optional
+// TODO: Optimize these shaders.
+static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
-
- char vertex_shader[2048];
- snprintf(vertex_shader, sizeof(vertex_shader),
- "#version 300 es \n"
- "in vec2 pos; \n"
- "in vec2 texcoords; \n"
- "out vec2 texcoords_out; \n"
- "uniform vec2 offset; \n"
- "uniform float rotation; \n"
- ROTATE_Z
- "void main() \n"
- "{ \n"
- " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
- " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
- "} \n");
-
- const char *main_code =
- main_code =
- " vec4 pixel = texture(tex1, texcoords_out); \n"
- " FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n"
- " FragColor.w = pixel.a; \n";
-
- char fragment_shader[2048];
- if(external_texture) {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "#extension GL_OES_EGL_image_external : enable \n"
- "#extension GL_OES_EGL_image_external_essl3 : require \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform samplerExternalOES tex1; \n"
- "out vec4 FragColor; \n"
- "%s"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", color_transform_matrix, main_code);
- } else {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform sampler2D tex1; \n"
- "out vec4 FragColor; \n"
- "%s"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", color_transform_matrix, main_code);
- }
-
- if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
+ const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
+
+ char compute_shader[2048];
+ snprintf(compute_shader, sizeof(compute_shader),
+ "#version 430 core\n"
+ "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
+ "uniform sampler2D imgInput;\n"
+ "uniform ivec2 source_position;\n"
+ "uniform ivec2 target_position;\n"
+ "uniform vec2 scale;\n"
+ "uniform mat2 rotation_matrix;\n"
+ "layout(%s, binding = 0) uniform image2D imgOutput;\n"
+ "%s"
+ "void main() {\n"
+ " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
+ " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
+ " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
+ " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
+ " vec4 source_color = texture(imgInput, texCoord);\n"
+ " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
+ " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
+ " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n"
+ " imageStore(imgOutput, texelCoord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n"
+ "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "r16" : "r8", color_transform_matrix);
+
+ if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
return -1;
- gsr_shader_bind_attribute_location(shader, "pos", 0);
- gsr_shader_bind_attribute_location(shader, "texcoords", 1);
- uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
- uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
+ uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
+ uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
+ uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
+ uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
return 0;
}
-static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
+static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
-
- char vertex_shader[2048];
- snprintf(vertex_shader, sizeof(vertex_shader),
- "#version 300 es \n"
- "in vec2 pos; \n"
- "in vec2 texcoords; \n"
- "out vec2 texcoords_out; \n"
- "uniform vec2 offset; \n"
- "uniform float rotation; \n"
- ROTATE_Z
- "void main() \n"
- "{ \n"
- " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
- " gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n"
- "} \n");
-
- const char *main_code =
- main_code =
- " vec4 pixel = texture(tex1, texcoords_out); \n"
- " FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n"
- " FragColor.w = pixel.a; \n";
-
- char fragment_shader[2048];
- if(external_texture) {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "#extension GL_OES_EGL_image_external : enable \n"
- "#extension GL_OES_EGL_image_external_essl3 : require \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform samplerExternalOES tex1; \n"
- "out vec4 FragColor; \n"
- "%s"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", color_transform_matrix, main_code);
- } else {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform sampler2D tex1; \n"
- "out vec4 FragColor; \n"
- "%s"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", color_transform_matrix, main_code);
- }
-
- if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
+ const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
+
+ char compute_shader[2048];
+ snprintf(compute_shader, sizeof(compute_shader),
+ "#version 430 core\n"
+ "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
+ "uniform sampler2D imgInput;\n"
+ "uniform ivec2 source_position;\n"
+ "uniform ivec2 target_position;\n"
+ "uniform vec2 scale;\n"
+ "uniform mat2 rotation_matrix;\n"
+ "layout(%s, binding = 0) uniform image2D imgOutput;\n"
+ "%s"
+ "void main() {\n"
+ " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
+ " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
+ " vec2 rotated_texel_coord = vec2(texelCoord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n"
+ " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
+ " vec4 source_color = texture(imgInput, texCoord * 2.0);\n"
+ " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
+ " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position/2);\n"
+ " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n"
+ " imageStore(imgOutput, texelCoord + target_position/2, vec4(uv_color, 1.0, 1.0));\n"
+ "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "rg16" : "rg8", color_transform_matrix);
+
+ if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
return -1;
- gsr_shader_bind_attribute_location(shader, "pos", 0);
- gsr_shader_bind_attribute_location(shader, "texcoords", 1);
- uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
- uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
+ uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
+ uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
+ uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
+ uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
return 0;
}
-static unsigned int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, bool external_texture) {
- char vertex_shader[2048];
- snprintf(vertex_shader, sizeof(vertex_shader),
- "#version 300 es \n"
- "in vec2 pos; \n"
- "in vec2 texcoords; \n"
- "out vec2 texcoords_out; \n"
- "uniform vec2 offset; \n"
- "uniform float rotation; \n"
- ROTATE_Z
- "void main() \n"
- "{ \n"
- " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
- " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
- "} \n");
-
- const char *main_code =
- main_code =
- " vec4 pixel = texture(tex1, texcoords_out); \n"
- " FragColor = pixel; \n";
-
- char fragment_shader[2048];
- if(external_texture) {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "#extension GL_OES_EGL_image_external : enable \n"
- "#extension GL_OES_EGL_image_external_essl3 : require \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform samplerExternalOES tex1; \n"
- "out vec4 FragColor; \n"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", main_code);
- } else {
- snprintf(fragment_shader, sizeof(fragment_shader),
- "#version 300 es \n"
- "precision highp float; \n"
- "in vec2 texcoords_out; \n"
- "uniform sampler2D tex1; \n"
- "out vec4 FragColor; \n"
- "void main() \n"
- "{ \n"
- "%s"
- "} \n", main_code);
- }
-
- if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
+static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim) {
+ char compute_shader[2048];
+ snprintf(compute_shader, sizeof(compute_shader),
+ "#version 430 core\n"
+ "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
+ "uniform sampler2D imgInput;\n"
+ "uniform ivec2 source_position;\n"
+ "uniform ivec2 target_position;\n"
+ "uniform vec2 scale;\n"
+ "uniform mat2 rotation_matrix;\n"
+ "layout(rgba8, binding = 0) uniform image2D imgOutput;\n"
+ "void main() {\n"
+ " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
+ " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
+ " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
+ " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
+ " vec4 source_color = texture(imgInput, texCoord);\n"
+ //" vec4 output_color = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
+ //" vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n"
+ " imageStore(imgOutput, texelCoord + target_position, source_color);\n"
+ "}\n", max_local_size_dim, max_local_size_dim);
+
+ if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
return -1;
- gsr_shader_bind_attribute_location(shader, "pos", 0);
- gsr_shader_bind_attribute_location(shader, "texcoords", 1);
- uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
- uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
+ uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
+ uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
+ uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
+ uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
return 0;
}
@@ -315,6 +239,11 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
memset(self, 0, sizeof(*self));
self->params.egl = params->egl;
self->params = *params;
+
+ int max_compute_work_group_invocations = 256;
+ self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations);
+ self->max_local_size_dim = sqrt(max_compute_work_group_invocations);
+ fprintf(stderr, "max local size: %d, max_local_size_dim: %d\n", max_compute_work_group_invocations, self->max_local_size_dim);
switch(params->destination_color) {
case GSR_DESTINATION_COLOR_NV12:
@@ -324,27 +253,15 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
- if(load_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], params->destination_color, params->color_range, false) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
+ if(load_compute_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
goto err;
}
- if(load_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], params->destination_color, params->color_range, false) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
+ if(load_compute_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n");
goto err;
}
-
- if(self->params.load_external_image_shader) {
- if(load_shader_y(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], params->destination_color, params->color_range, true) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
- goto err;
- }
-
- if(load_shader_uv(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], params->destination_color, params->color_range, true) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
- goto err;
- }
- }
break;
}
case GSR_DESTINATION_COLOR_RGB8: {
@@ -353,17 +270,10 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
- if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->uniforms[0], false) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
+ if(load_compute_shader_rgb(&self->shaders[2], self->params.egl, &self->uniforms[2], self->max_local_size_dim) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
goto err;
}
-
- if(self->params.load_external_image_shader) {
- if(load_shader_rgb(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], true) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
- goto err;
- }
- }
break;
}
}
@@ -400,127 +310,119 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) {
self->framebuffers[i] = 0;
}
- for(int i = 0; i < MAX_SHADERS; ++i) {
+ for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) {
gsr_shader_deinit(&self->shaders[i]);
}
self->params.egl = NULL;
}
-static void gsr_color_conversion_swizzle_texture_source(gsr_color_conversion *self, gsr_source_color source_color) {
- if(source_color == GSR_SOURCE_COLOR_BGR) {
- const int swizzle_mask[] = { GL_BLUE, GL_GREEN, GL_RED, 1 };
- self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
+static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) {
+ /*
+ rotation_matrix[0][0] = cos(angle);
+ rotation_matrix[0][1] = -sin(angle);
+ rotation_matrix[1][0] = sin(angle);
+ rotation_matrix[1][1] = cos(angle);
+ The manual matrix code below is the same as this code above, but without floating-point errors.
+ This is done to remove any blurring caused by these floating-point errors.
+ */
+ switch(rotation) {
+ case GSR_ROT_0:
+ rotation_matrix[0][0] = 1.0f;
+ rotation_matrix[0][1] = 0.0f;
+ rotation_matrix[1][0] = 0.0f;
+ rotation_matrix[1][1] = 1.0f;
+ break;
+ case GSR_ROT_90:
+ rotation_matrix[0][0] = 0.0f;
+ rotation_matrix[0][1] = -1.0f;
+ rotation_matrix[1][0] = 1.0f;
+ rotation_matrix[1][1] = 0.0f;
+ source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
+ source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
+ break;
+ case GSR_ROT_180:
+ rotation_matrix[0][0] = -1.0f;
+ rotation_matrix[0][1] = 0.0f;
+ rotation_matrix[1][0] = 0.0f;
+ rotation_matrix[1][1] = -1.0f;
+ break;
+ case GSR_ROT_270:
+ rotation_matrix[0][0] = 0.0f;
+ rotation_matrix[0][1] = 1.0f;
+ rotation_matrix[1][0] = -1.0f;
+ rotation_matrix[1][1] = 0.0f;
+ source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
+ source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
+ break;
}
}
-static void gsr_color_conversion_swizzle_reset(gsr_color_conversion *self, gsr_source_color source_color) {
- if(source_color == GSR_SOURCE_COLOR_BGR) {
- const int swizzle_mask[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA };
- self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
- }
-}
+// TODO: Handle source_color
+void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) {
+ vec2f scale = {0.0f, 0.0f};
+ if(texture_size.x > 0 && texture_size.y > 0)
+ scale = (vec2f){ (double)destination_size.x/(double)texture_size.x, (double)destination_size.y/(double)texture_size.y };
-/* |source_pos| is in pixel coordinates and |source_size| */
-void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color) {
- // TODO: Remove this crap
- rotation = M_PI*2.0f - rotation;
+ vec2i source_position = {0, 0};
+ float rotation_matrix[2][2] = {{0, 0}, {0, 0}};
+ gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale);
- /* TODO: Do not call this every frame? */
- vec2i dest_texture_size = {0, 0};
- self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]);
- self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x);
- self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y);
- self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
+ source_position.x += texture_pos.x;
+ source_position.y += texture_pos.y;
const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D;
-
self->params.egl->glBindTexture(texture_target, texture_id);
- vec2i source_texture_size = {0, 0};
- if(external_texture) {
- assert(self->params.load_external_image_shader);
- source_texture_size = source_size;
- } else {
- /* TODO: Do not call this every frame? */
- self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_WIDTH, &source_texture_size.x);
- self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_HEIGHT, &source_texture_size.y);
- }
-
- // TODO: Remove this crap
- if(abs_f(M_PI * 0.5f - rotation) <= 0.001f || abs_f(M_PI * 1.5f - rotation) <= 0.001f) {
- float tmp = source_texture_size.x;
- source_texture_size.x = source_texture_size.y;
- source_texture_size.y = tmp;
- }
-
- const vec2f pos_norm = {
- ((float)source_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
- ((float)source_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
- };
-
- const vec2f size_norm = {
- ((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
- ((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
- };
-
- const vec2f texture_pos_norm = {
- (float)texture_pos.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
- (float)texture_pos.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
- };
-
- const vec2f texture_size_norm = {
- (float)texture_size.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
- (float)texture_size.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
- };
-
- const float vertices[] = {
- -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
- -1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y,
- -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
-
- -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
- -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
- -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y
- };
-
- gsr_color_conversion_swizzle_texture_source(self, source_color);
-
- self->params.egl->glBindVertexArray(self->vertex_array_object_id);
- self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y);
-
- /* TODO: this, also cleanup */
- //self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id);
- self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices);
-
- {
- self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]);
- //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example)
+ switch(self->params.destination_color) {
+ case GSR_DESTINATION_COLOR_NV12:
+ case GSR_DESTINATION_COLOR_P010: {
+ const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010;
+ // Y
+ {
+ gsr_shader_use(&self->shaders[0]);
+ self->params.egl->glUniformMatrix2fv(self->uniforms[0].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
+ self->params.egl->glUniform2i(self->uniforms[0].source_position, source_position.x, source_position.y);
+ self->params.egl->glUniform2i(self->uniforms[0].target_position, destination_pos.x, destination_pos.y);
+ self->params.egl->glUniform2f(self->uniforms[0].scale, scale.x, scale.y);
+ self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_R16 : GL_R8);
+ const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
+ const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
+ self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
+ }
- const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET : 0;
- gsr_shader_use(&self->shaders[shader_index]);
- self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
- self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
- self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
+ // UV
+ {
+ gsr_shader_use(&self->shaders[1]);
+ self->params.egl->glUniformMatrix2fv(self->uniforms[1].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
+ self->params.egl->glUniform2i(self->uniforms[1].source_position, source_position.x, source_position.y);
+ self->params.egl->glUniform2i(self->uniforms[1].target_position, destination_pos.x, destination_pos.y);
+ self->params.egl->glUniform2f(self->uniforms[1].scale, scale.x, scale.y);
+ self->params.egl->glBindImageTexture(0, self->params.destination_textures[1], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_RG16 : GL_RG8);
+ const double num_groups_x = (double)texture_size.x*0.5/(double)self->max_local_size_dim + 0.5;
+ const double num_groups_y = (double)texture_size.y*0.5/(double)self->max_local_size_dim + 0.5;
+ self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
+ }
+ break;
+ }
+ case GSR_DESTINATION_COLOR_RGB8: {
+ gsr_shader_use(&self->shaders[2]);
+ self->params.egl->glUniformMatrix2fv(self->uniforms[2].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
+ self->params.egl->glUniform2i(self->uniforms[2].source_position, source_position.x, source_position.y);
+ self->params.egl->glUniform2i(self->uniforms[2].target_position, destination_pos.x, destination_pos.y);
+ self->params.egl->glUniform2f(self->uniforms[2].scale, scale.x, scale.y);
+ self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
+ const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
+ const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
+ self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
+ break;
+ }
}
- if(self->params.num_destination_textures > 1) {
- self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]);
- //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT);
+ self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
+ self->params.egl->glUseProgram(0);
- const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET + 1 : 1;
- gsr_shader_use(&self->shaders[shader_index]);
- self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
- self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
- self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
- }
-
- self->params.egl->glBindVertexArray(0);
- gsr_shader_use_none(&self->shaders[0]);
self->params.egl->glBindTexture(texture_target, 0);
- self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
-
- gsr_color_conversion_swizzle_reset(self, source_color);
}
void gsr_color_conversion_clear(gsr_color_conversion *self) {
@@ -557,3 +459,7 @@ void gsr_color_conversion_clear(gsr_color_conversion *self) {
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
+
+gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation) {
+ return (gsr_rotation)monitor_rotation;
+}
diff --git a/src/egl.c b/src/egl.c
index 7c9a877..1138ae7 100644
--- a/src/egl.c
+++ b/src/egl.c
@@ -284,6 +284,7 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenTextures, "glGenTextures" },
{ (void**)&self->glDeleteTextures, "glDeleteTextures" },
{ (void**)&self->glBindTexture, "glBindTexture" },
+ { (void**)&self->glBindImageTexture, "glBindImageTexture" },
{ (void**)&self->glTexParameteri, "glTexParameteri" },
{ (void**)&self->glTexParameteriv, "glTexParameteriv" },
{ (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" },
@@ -293,6 +294,8 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenFramebuffers, "glGenFramebuffers" },
{ (void**)&self->glBindFramebuffer, "glBindFramebuffer" },
{ (void**)&self->glDeleteFramebuffers, "glDeleteFramebuffers" },
+ { (void**)&self->glDispatchCompute, "glDispatchCompute" },
+ { (void**)&self->glMemoryBarrier, "glMemoryBarrier" },
{ (void**)&self->glViewport, "glViewport" },
{ (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" },
{ (void**)&self->glDrawBuffers, "glDrawBuffers" },
@@ -329,11 +332,14 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGetUniformLocation, "glGetUniformLocation" },
{ (void**)&self->glUniform1f, "glUniform1f" },
{ (void**)&self->glUniform2f, "glUniform2f" },
+ { (void**)&self->glUniform2i, "glUniform2i" },
+ { (void**)&self->glUniformMatrix2fv, "glUniformMatrix2fv" },
{ (void**)&self->glDebugMessageCallback, "glDebugMessageCallback" },
{ (void**)&self->glScissor, "glScissor" },
{ (void**)&self->glReadPixels, "glReadPixels" },
{ (void**)&self->glMapBuffer, "glMapBuffer" },
{ (void**)&self->glUnmapBuffer, "glUnmapBuffer" },
+ { (void**)&self->glGetIntegerv, "glGetIntegerv" },
{ NULL, NULL }
};
diff --git a/src/encoder/video/software.c b/src/encoder/video/software.c
index 3649ff1..627cdea 100644
--- a/src/encoder/video/software.c
+++ b/src/encoder/video/software.c
@@ -83,8 +83,8 @@ static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
// cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface);
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ //self->params.egl->glFlush();
+ //self->params.egl->glFinish();
}
static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) {
diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c
index 8bb2f08..1d5dae0 100644
--- a/src/encoder/video/vaapi.c
+++ b/src/encoder/video/vaapi.c
@@ -123,8 +123,8 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
while(self->params.egl->glGetError()) {}
while(self->params.egl->eglGetError() != EGL_SUCCESS){}
diff --git a/src/image_writer.c b/src/image_writer.c
index c4d66f4..e153a34 100644
--- a/src/image_writer.c
+++ b/src/image_writer.c
@@ -17,7 +17,7 @@ bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int widt
self->egl = egl;
self->width = width;
self->height = height;
- self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGB8, GL_RGB, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
+ self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGBA8, GL_RGBA, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
if(self->texture == 0) {
fprintf(stderr, "gsr error: gsr_image_writer_init: failed to create texture\n");
return false;
@@ -50,10 +50,10 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
bool success = false;
switch(image_format) {
case GSR_IMAGE_FORMAT_JPEG:
- success = stbi_write_jpg(filepath, self->width, self->height, 3, data, quality);
+ success = stbi_write_jpg(filepath, self->width, self->height, 4, data, quality);
break;
case GSR_IMAGE_FORMAT_PNG:
- success = stbi_write_png(filepath, self->width, self->height, 3, data, 0);
+ success = stbi_write_png(filepath, self->width, self->height, 4, data, 0);
break;
}
@@ -65,7 +65,7 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self, const char *filepath, gsr_image_format image_format, int quality) {
assert(self->source == GSR_IMAGE_WRITER_SOURCE_OPENGL);
- uint8_t *frame_data = malloc(self->width * self->height * 3);
+ uint8_t *frame_data = malloc(self->width * self->height * 4);
if(!frame_data) {
fprintf(stderr, "gsr error: gsr_image_writer_write_to_file: failed to allocate memory for image frame\n");
return false;
@@ -74,7 +74,7 @@ static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self
// TODO: hdr support
self->egl->glBindTexture(GL_TEXTURE_2D, self->texture);
// We could use glGetTexSubImage, but it's only available starting from opengl 4.5
- self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, frame_data);
+ self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame_data);
self->egl->glBindTexture(GL_TEXTURE_2D, 0);
self->egl->glFlush();
diff --git a/src/main.cpp b/src/main.cpp
index 6adeb05..f0a3e06 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1072,8 +1072,9 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
// TODO: More quality options
if(low_power)
av_dict_set_int(&options, "low_power", 1, 0);
- // Improves performance but increases vram
- //av_dict_set_int(&options, "async_depth", 8, 0);
+ // Improves performance but increases vram.
+ // TODO: Might need a different async_depth for optimal performance on different amd/intel gpus
+ //av_dict_set_int(&options, "async_depth", 3, 0);
if(codec_context->codec_id == AV_CODEC_ID_H264) {
// Removed because it causes stutter in games for some people
diff --git a/src/shader.c b/src/shader.c
index dcb956b..b9fbb62 100644
--- a/src/shader.c
+++ b/src/shader.c
@@ -36,28 +36,36 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s
return shader_id;
}
-static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
+static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
unsigned int vertex_shader_id = 0;
unsigned int fragment_shader_id = 0;
+ unsigned int compute_shader_id = 0;
unsigned int program_id = 0;
int linked = 0;
+ bool success = false;
if(vertex_shader) {
vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader);
if(vertex_shader_id == 0)
- goto err;
+ goto done;
}
if(fragment_shader) {
fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader);
if(fragment_shader_id == 0)
- goto err;
+ goto done;
+ }
+
+ if(compute_shader) {
+ compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader);
+ if(compute_shader_id == 0)
+ goto done;
}
program_id = egl->glCreateProgram();
if(program_id == 0) {
fprintf(stderr, "gsr error: load_program: failed to create shader program, error: %d\n", egl->glGetError());
- goto err;
+ goto done;
}
if(vertex_shader_id)
@@ -66,6 +74,9 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
if(fragment_shader_id)
egl->glAttachShader(program_id, fragment_shader_id);
+ if(compute_shader_id)
+ egl->glAttachShader(program_id, compute_shader_id);
+
egl->glLinkProgram(program_id);
egl->glGetProgramiv(program_id, GL_LINK_STATUS, &linked);
@@ -79,37 +90,36 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
fprintf(stderr, "gsr error: load program: linking shader program failed, error:\n%s\n", info_log);
}
- goto err;
+ goto done;
}
- if(fragment_shader_id)
- egl->glDeleteShader(fragment_shader_id);
- if(vertex_shader_id)
- egl->glDeleteShader(vertex_shader_id);
-
- return program_id;
+ success = true;
+ done:
- err:
- if(program_id)
- egl->glDeleteProgram(program_id);
+ if(!success) {
+ if(program_id)
+ egl->glDeleteProgram(program_id);
+ }
+ if(compute_shader_id)
+ egl->glDeleteShader(compute_shader_id);
if(fragment_shader_id)
egl->glDeleteShader(fragment_shader_id);
if(vertex_shader_id)
egl->glDeleteShader(vertex_shader_id);
- return 0;
+ return program_id;
}
-int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
+int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
assert(egl);
self->egl = egl;
self->program_id = 0;
- if(!vertex_shader && !fragment_shader) {
- fprintf(stderr, "gsr error: gsr_shader_init: vertex shader and fragment shader can't be NULL at the same time\n");
+ if(!vertex_shader && !fragment_shader && !compute_shader) {
+ fprintf(stderr, "gsr error: gsr_shader_init: vertex, fragment shader and compute shaders can't be NULL at the same time\n");
return -1;
}
- self->program_id = load_program(self->egl, vertex_shader, fragment_shader);
+ self->program_id = load_program(self->egl, vertex_shader, fragment_shader, compute_shader);
if(self->program_id == 0)
return -1;
diff --git a/src/utils.c b/src/utils.c
index 325f750..943fb2d 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -14,10 +14,8 @@
#include <xf86drmMode.h>
#include <xf86drm.h>
-#include <libdrm/drm_fourcc.h>
#include <X11/Xatom.h>
#include <X11/extensions/Xrandr.h>
-#include <va/va_drmcommon.h>
#include <libavcodec/avcodec.h>
#include <libavutil/hwcontext_vaapi.h>
@@ -663,241 +661,6 @@ bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) {
return device_context->type == AV_HWDEVICE_TYPE_VAAPI;
}
-static uint32_t drm_fourcc_to_va_fourcc(uint32_t drm_fourcc) {
- switch(drm_fourcc) {
- case DRM_FORMAT_XRGB8888: return VA_FOURCC_BGRX;
- case DRM_FORMAT_XBGR8888: return VA_FOURCC_RGBX;
- case DRM_FORMAT_RGBX8888: return VA_FOURCC_XBGR;
- case DRM_FORMAT_BGRX8888: return VA_FOURCC_XRGB;
- case DRM_FORMAT_ARGB8888: return VA_FOURCC_BGRA;
- case DRM_FORMAT_ABGR8888: return VA_FOURCC_RGBA;
- case DRM_FORMAT_RGBA8888: return VA_FOURCC_ABGR;
- case DRM_FORMAT_BGRA8888: return VA_FOURCC_ARGB;
- default: return drm_fourcc;
- }
-}
-
-bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) {
- VAConfigID config_id = 0;
- VAContextID context_id = 0;
- VASurfaceID input_surface_id = 0;
- VABufferID buffer_id = 0;
- bool success = true;
-
- VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context);
- if(!va_dpy) {
- success = false;
- goto done;
- }
-
- VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %s\n", vaErrorStr(va_status));
- success = false;
- goto done;
- }
-
- VASurfaceID output_surface_id = (uintptr_t)video_frame->data[3];
- va_status = vaCreateContext(va_dpy, config_id, size.x, size.y, VA_PROGRESSIVE, &output_surface_id, 1, &context_id);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %s\n", vaErrorStr(va_status));
- success = false;
- goto done;
- }
-
- VADRMPRIMESurfaceDescriptor buf = {0};
- buf.fourcc = drm_fourcc_to_va_fourcc(format);//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10
- buf.width = size.x;
- buf.height = size.y;
- buf.num_objects = num_planes;
- buf.num_layers = 1;
- buf.layers[0].drm_format = format;
- buf.layers[0].num_planes = buf.num_objects;
- for(int i = 0; i < num_planes; ++i) {
- buf.objects[i].fd = fds[i];
- buf.objects[i].size = size.y * pitches[i]; // TODO:
- buf.objects[i].drm_format_modifier = modifiers[i];
-
- buf.layers[0].object_index[i] = i;
- buf.layers[0].offset[i] = offsets[i];
- buf.layers[0].pitch[i] = pitches[i];
- }
-
- VASurfaceAttrib attribs[2] = {0};
- attribs[0].type = VASurfaceAttribMemoryType;
- attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
- attribs[0].value.type = VAGenericValueTypeInteger;
- attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
- attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
- attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
- attribs[1].value.type = VAGenericValueTypePointer;
- attribs[1].value.value.p = &buf;
-
- // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10
- // TODO: Max size same as source_size
- va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, size.x, size.y, &input_surface_id, 1, attribs, 2);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %s\n", vaErrorStr(va_status));
- success = false;
- goto done;
- }
-
- const VARectangle source_region = {
- .x = source_pos.x,
- .y = source_pos.y,
- .width = source_size.x,
- .height = source_size.y
- };
-
- const VARectangle output_region = {
- .x = dest_pos.x,
- .y = dest_pos.y,
- .width = dest_size.x,
- .height = dest_size.y
- };
-
- const bool scaled = dest_size.x != source_size.x || dest_size.y != source_size.y;
-
- // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi!
- VAProcPipelineParameterBuffer params = {0};
- params.surface = input_surface_id;
- params.surface_region = NULL;
- params.surface_region = &source_region;
- params.output_region = &output_region;
- params.output_background_color = 0;
- params.filter_flags = scaled ? (VA_FILTER_SCALING_HQ | VA_FILTER_INTERPOLATION_BILINEAR) : 0;
- params.pipeline_flags = VA_PROC_PIPELINE_FAST;
-
- params.input_color_properties.colour_primaries = 1;
- params.input_color_properties.transfer_characteristics = 1;
- params.input_color_properties.matrix_coefficients = 1;
- params.surface_color_standard = VAProcColorStandardBT709; // TODO:
- params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
-
- params.output_color_properties.colour_primaries = 1;
- params.output_color_properties.transfer_characteristics = 1;
- params.output_color_properties.matrix_coefficients = 1;
- params.output_color_standard = VAProcColorStandardBT709; // TODO:
- params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
-
- params.processing_mode = VAProcPerformanceMode;
-
- // VAProcPipelineCaps pipeline_caps = {0};
- // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy,
- // self->context_id,
- // NULL, 0,
- // &pipeline_caps);
- // if(va_status == VA_STATUS_SUCCESS) {
- // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags);
- // }
-
- // TODO: params.output_hdr_metadata
-
- // TODO:
- // if (first surface to render)
- // pipeline_param->output_background_color = 0xff000000; // black
-
- va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, &params, &buffer_id);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status);
- success = false;
- goto done;
- }
-
- va_status = vaBeginPicture(va_dpy, context_id, output_surface_id);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status);
- success = false;
- goto done;
- }
-
- va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1);
- if(va_status != VA_STATUS_SUCCESS) {
- vaEndPicture(va_dpy, context_id);
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status);
- success = false;
- goto done;
- }
-
- va_status = vaEndPicture(va_dpy, context_id);
- if(va_status != VA_STATUS_SUCCESS) {
- fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status);
- success = false;
- goto done;
- }
-
- // vaSyncBuffer(va_dpy, buffer_id, 1000 * 1000 * 1000);
- // vaSyncSurface(va_dpy, input_surface_id);
- // vaSyncSurface(va_dpy, output_surface_id);
-
- done:
- if(buffer_id)
- vaDestroyBuffer(va_dpy, buffer_id);
-
- if(input_surface_id)
- vaDestroySurfaces(va_dpy, &input_surface_id, 1);
-
- if(context_id)
- vaDestroyContext(va_dpy, context_id);
-
- if(config_id)
- vaDestroyConfig(va_dpy, config_id);
-
- return success;
-}
-
-bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame) {
- if(!image)
- return false;
-
- int texture_fourcc = 0;
- int texture_num_planes = 0;
- uint64_t texture_modifiers = 0;
- if(!egl->eglExportDMABUFImageQueryMESA(egl->egl_display, image, &texture_fourcc, &texture_num_planes, &texture_modifiers)) {
- fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageQueryMESA failed\n");
- return false;
- }
-
- if(texture_num_planes <= 0 || texture_num_planes > 8) {
- fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: expected planes size to be 0<planes<=8 for drm buf, got %d planes\n", texture_num_planes);
- return false;
- }
-
- int texture_fds[8];
- int32_t texture_strides[8];
- int32_t texture_offsets[8];
-
- while(egl->eglGetError() != EGL_SUCCESS){}
- if(!egl->eglExportDMABUFImageMESA(egl->egl_display, image, texture_fds, texture_strides, texture_offsets)) {
- fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageMESA failed, error: %d\n", egl->eglGetError());
- return false;
- }
-
- int fds[8];
- uint32_t offsets[8];
- uint32_t pitches[8];
- uint64_t modifiers[8];
- for(int i = 0; i < texture_num_planes; ++i) {
- fds[i] = texture_fds[i];
- offsets[i] = texture_offsets[i];
- pitches[i] = texture_strides[i];
- modifiers[i] = texture_modifiers;
-
- if(fds[i] == -1)
- texture_num_planes = i;
- }
- const bool success = texture_num_planes > 0 && vaapi_copy_drm_planes_to_video_surface(video_codec_context, video_frame, source_pos, source_size, dest_pos, dest_size, texture_fourcc, source_size, fds, offsets, pitches, modifiers, texture_num_planes);
-
- for(int i = 0; i < texture_num_planes; ++i) {
- if(texture_fds[i] > 0) {
- close(texture_fds[i]);
- texture_fds[i] = -1;
- }
- }
-
- return success;
-}
-
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to) {
if(from.x == 0 || from.y == 0)
return (vec2i){0, 0};