aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2025-03-29 15:37:53 +0100
committerdec05eba <dec05eba@protonmail.com>2025-03-29 15:37:53 +0100
commitf85a7ab2055ddc8e2231a380f0c46e29e42c2072 (patch)
tree33cf9c1d086f3bd8332e36e97c73f793f18db11e
parenta41a32cb9079dc3c78414b7b0cdc68243333020d (diff)
Revert "WIP: use compute shader instead of graphics shader for better performance (especially on amd)"
This reverts commit a41a32cb9079dc3c78414b7b0cdc68243333020d.
-rw-r--r--include/color_conversion.h26
-rw-r--r--include/egl.h14
-rw-r--r--include/image_writer.h2
-rw-r--r--include/shader.h2
-rw-r--r--include/utils.h2
-rw-r--r--src/capture/kms.c101
-rw-r--r--src/capture/nvfbc.c10
-rw-r--r--src/capture/portal.c61
-rw-r--r--src/capture/xcomposite.c35
-rw-r--r--src/capture/ximage.c4
-rw-r--r--src/color_conversion.c516
-rw-r--r--src/egl.c6
-rw-r--r--src/encoder/video/software.c4
-rw-r--r--src/encoder/video/vaapi.c4
-rw-r--r--src/image_writer.c10
-rw-r--r--src/main.cpp5
-rw-r--r--src/shader.c48
-rw-r--r--src/utils.c237
18 files changed, 751 insertions, 336 deletions
diff --git a/include/color_conversion.h b/include/color_conversion.h
index 37eeb8e..b80be21 100644
--- a/include/color_conversion.h
+++ b/include/color_conversion.h
@@ -2,12 +2,9 @@
#define GSR_COLOR_CONVERSION_H
#include "shader.h"
-#include "defs.h"
#include "vec2.h"
#include <stdbool.h>
-#define GSR_COLOR_CONVERSION_MAX_SHADERS 3
-
typedef enum {
GSR_COLOR_RANGE_LIMITED,
GSR_COLOR_RANGE_FULL
@@ -29,18 +26,9 @@ typedef enum {
GSR_DESTINATION_COLOR_RGB8
} gsr_destination_color;
-typedef enum {
- GSR_ROT_0,
- GSR_ROT_90,
- GSR_ROT_180,
- GSR_ROT_270
-} gsr_rotation;
-
typedef struct {
- int rotation_matrix;
- int source_position;
- int target_position;
- int scale;
+ int offset;
+ int rotation;
} gsr_color_uniforms;
typedef struct {
@@ -57,23 +45,19 @@ typedef struct {
typedef struct {
gsr_color_conversion_params params;
- gsr_color_uniforms uniforms[GSR_COLOR_CONVERSION_MAX_SHADERS];
- gsr_shader shaders[GSR_COLOR_CONVERSION_MAX_SHADERS];
+ gsr_color_uniforms uniforms[4];
+ gsr_shader shaders[4];
unsigned int framebuffers[2];
unsigned int vertex_array_object_id;
unsigned int vertex_buffer_object_id;
-
- int max_local_size_dim;
} gsr_color_conversion;
int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conversion_params *params);
void gsr_color_conversion_deinit(gsr_color_conversion *self);
-void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color);
+void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color);
void gsr_color_conversion_clear(gsr_color_conversion *self);
-gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation);
-
#endif /* GSR_COLOR_CONVERSION_H */
diff --git a/include/egl.h b/include/egl.h
index 643ab30..0d08270 100644
--- a/include/egl.h
+++ b/include/egl.h
@@ -98,7 +98,7 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_TEXTURE_EXTERNAL_OES 0x8D65
#define GL_RED 0x1903
#define GL_GREEN 0x1904
-#define GL_BLUE 0x1905
+#define GL_BLUE 0x1905
#define GL_ALPHA 0x1906
#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46
#define GL_RG 0x8227
@@ -111,7 +111,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_R16 0x822A
#define GL_RG16 0x822C
#define GL_RGB16 0x8054
-#define GL_RGBA32F 0x8814
#define GL_UNSIGNED_BYTE 0x1401
#define GL_COLOR_BUFFER_BIT 0x00004000
#define GL_TEXTURE_WRAP_S 0x2802
@@ -135,10 +134,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_SCISSOR_TEST 0x0C11
#define GL_PACK_ALIGNMENT 0x0D05
#define GL_UNPACK_ALIGNMENT 0x0CF5
-#define GL_READ_ONLY 0x88B8
-#define GL_WRITE_ONLY 0x88B9
-#define GL_READ_WRITE 0x88BA
-#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS 0x90EB
#define GL_VENDOR 0x1F00
#define GL_RENDERER 0x1F01
@@ -148,7 +143,6 @@ typedef void(*__GLXextFuncPtr)(void);
#define GL_INFO_LOG_LENGTH 0x8B84
#define GL_FRAGMENT_SHADER 0x8B30
#define GL_VERTEX_SHADER 0x8B31
-#define GL_COMPUTE_SHADER 0x91B9
#define GL_COMPILE_STATUS 0x8B81
#define GL_LINK_STATUS 0x8B82
@@ -237,7 +231,6 @@ struct gsr_egl {
void (*glGenTextures)(int n, unsigned int *textures);
void (*glDeleteTextures)(int n, const unsigned int *texture);
void (*glBindTexture)(unsigned int target, unsigned int texture);
- void (*glBindImageTexture)(unsigned int unit, unsigned int texture, int level, unsigned char layered, int layer, unsigned int access, unsigned int format);
void (*glTexParameteri)(unsigned int target, unsigned int pname, int param);
void (*glTexParameteriv)(unsigned int target, unsigned int pname, const int *params);
void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params);
@@ -247,8 +240,6 @@ struct gsr_egl {
void (*glGenFramebuffers)(int n, unsigned int *framebuffers);
void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer);
void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers);
- void (*glDispatchCompute)(unsigned int num_groups_x, unsigned int num_groups_y, unsigned int num_groups_z);
- void (*glMemoryBarrier)(unsigned int barriers);
void (*glViewport)(int x, int y, int width, int height);
void (*glFramebufferTexture2D)(unsigned int target, unsigned int attachment, unsigned int textarget, unsigned int texture, int level);
void (*glDrawBuffers)(int n, const unsigned int *bufs);
@@ -285,14 +276,11 @@ struct gsr_egl {
int (*glGetUniformLocation)(unsigned int program, const char *name);
void (*glUniform1f)(int location, float v0);
void (*glUniform2f)(int location, float v0, float v1);
- void (*glUniform2i)(int location, int v0, int v1);
- void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value);
void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam);
void (*glScissor)(int x, int y, int width, int height);
void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels);
void* (*glMapBuffer)(unsigned int target, unsigned int access);
unsigned char (*glUnmapBuffer)(unsigned int target);
- void (*glGetIntegerv)(unsigned int pname, int *params);
};
bool gsr_egl_load(gsr_egl *self, gsr_window *window, bool is_monitor_capture, bool enable_debug);
diff --git a/include/image_writer.h b/include/image_writer.h
index 65e7497..400edd0 100644
--- a/include/image_writer.h
+++ b/include/image_writer.h
@@ -25,7 +25,7 @@ typedef struct {
} gsr_image_writer;
bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int width, int height);
-/* |memory| is taken as a reference. The data is expected to be in rgba8 format (8 bit rgba) */
+/* |memory| is taken as a reference */
bool gsr_image_writer_init_memory(gsr_image_writer *self, const void *memory, int width, int height);
void gsr_image_writer_deinit(gsr_image_writer *self);
diff --git a/include/shader.h b/include/shader.h
index 8bc1104..57d1096 100644
--- a/include/shader.h
+++ b/include/shader.h
@@ -9,7 +9,7 @@ typedef struct {
} gsr_shader;
/* |vertex_shader| or |fragment_shader| may be NULL */
-int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader);
+int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader);
void gsr_shader_deinit(gsr_shader *self);
int gsr_shader_bind_attribute_location(gsr_shader *self, const char *attribute, int location);
diff --git a/include/utils.h b/include/utils.h
index 873e6e4..fd340e8 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -64,6 +64,8 @@ int create_directory_recursive(char *path);
/* |img_attr| needs to be at least 44 in size */
void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes, bool use_modifier);
bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context);
+bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes);
+bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame);
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to);
diff --git a/src/capture/kms.c b/src/capture/kms.c
index e98fab7..578fded 100644
--- a/src/capture/kms.c
+++ b/src/capture/kms.c
@@ -53,6 +53,10 @@ typedef struct {
bool is_x11;
gsr_cursor x11_cursor;
+ bool performance_error_shown;
+ bool fast_path_failed;
+ bool mesa_supports_compute_only_vaapi_copy;
+
//int drm_fd;
//uint64_t prev_sequence;
//bool damaged;
@@ -225,6 +229,17 @@ static int gsr_capture_kms_start(gsr_capture *cap, gsr_capture_metadata *capture
capture_metadata->height = self->capture_size.y;
}
+ self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
+ if(self->fast_path_failed)
+ fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
+
+ //if(self->params.hdr) {
+ // self->fast_path_failed = true;
+ // fprintf(stderr, "gsr warning: gsr_capture_kms_start: recording with hdr requires shader color conversion which might be slow. If this is an issue record with -w portal instead (which converts HDR to SDR)\n");
+ //}
+
+ self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
+
self->last_time_monitor_check = clock_get_monotonic_seconds();
return 0;
}
@@ -259,6 +274,16 @@ static void gsr_capture_kms_on_event(gsr_capture *cap, gsr_egl *egl) {
// }
// }
+static float monitor_rotation_to_radians(gsr_monitor_rotation rot) {
+ switch(rot) {
+ case GSR_MONITOR_ROT_0: return 0.0f;
+ case GSR_MONITOR_ROT_90: return M_PI_2;
+ case GSR_MONITOR_ROT_180: return M_PI;
+ case GSR_MONITOR_ROT_270: return M_PI + M_PI_2;
+ }
+ return 0.0f;
+}
+
static gsr_kms_response_item* find_drm_by_connector_id(gsr_kms_response *kms_response, uint32_t connector_id) {
for(int i = 0; i < kms_response->num_items; ++i) {
if(kms_response->items[i].connector_id == connector_id && !kms_response->items[i].is_cursor)
@@ -424,7 +449,7 @@ static gsr_kms_response_item* find_cursor_drm_if_on_monitor(gsr_capture_kms *sel
return cursor_drm_fd;
}
-static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, vec2i output_size, vec2i framebuffer_size) {
+static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, const gsr_kms_response_item *cursor_drm_fd, vec2i target_pos, float texture_rotation, vec2i output_size, vec2i framebuffer_size) {
const vec2d scale = {
self->capture_size.x == 0 ? 0 : (double)output_size.x / (double)self->capture_size.x,
self->capture_size.y == 0 ? 0 : (double)output_size.y / (double)self->capture_size.y
@@ -498,7 +523,7 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->cursor_texture_id,
cursor_pos, (vec2i){cursor_size.x * scale.x, cursor_size.y * scale.y},
(vec2i){0, 0}, cursor_size,
- gsr_monitor_rotation_to_rotation(self->monitor_rotation), cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
+ texture_rotation, cursor_texture_id_is_external, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -526,7 +551,7 @@ static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color
gsr_color_conversion_draw(color_conversion, self->x11_cursor.texture_id,
cursor_pos, (vec2i){self->x11_cursor.size.x * scale.x, self->x11_cursor.size.y * scale.y},
(vec2i){0, 0}, self->x11_cursor.size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
@@ -579,6 +604,16 @@ static void gsr_capture_kms_update_connector_ids(gsr_capture_kms *self) {
self->capture_size = rotate_capture_size_if_rotated(self, monitor.size);
}
+static void gsr_capture_kms_fail_fast_path_if_not_fast(gsr_capture_kms *self, uint32_t pixel_format) {
+ const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
+ if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
+ self->fast_path_failed = true;
+ fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
+ " If you experience performance problems in the video then record on a single window on X11 or use portal capture option instead or disable 10/12-bit color option in your desktop environment settings,"
+ " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
+ }
+}
+
static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
gsr_capture_kms *self = cap->priv;
@@ -610,6 +645,15 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata))
gsr_kms_set_hdr_metadata(self, drm_fd);
+ if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
+ self->performance_error_shown = true;
+ self->fast_path_failed = true;
+ fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used."
+ " If you experience performance problems in the video then record a single window on X11 or use portal capture option instead\n");
+ }
+
+ gsr_capture_kms_fail_fast_path_if_not_fast(self, drm_fd->pixel_format);
+
self->capture_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
if(self->params.region_size.x > 0 && self->params.region_size.y > 0)
self->capture_size = self->params.region_size;
@@ -618,6 +662,7 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
+ const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
gsr_capture_kms_update_capture_size_change(self, color_conversion, target_pos, drm_fd);
@@ -628,19 +673,41 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
capture_pos.x += self->params.region_position.x;
capture_pos.y += self->params.region_position.y;
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
-
- EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
- if(image) {
- gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
- self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
+
+ /* Fast opengl free path */
+ if(!self->fast_path_failed && self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
+ int fds[4];
+ uint32_t offsets[4];
+ uint32_t pitches[4];
+ uint64_t modifiers[4];
+ for(int i = 0; i < drm_fd->num_dma_bufs; ++i) {
+ fds[i] = drm_fd->dma_buf[i].fd;
+ offsets[i] = drm_fd->dma_buf[i].offset;
+ pitches[i] = drm_fd->dma_buf[i].pitch;
+ modifiers[i] = drm_fd->modifier;
+ }
+ if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){capture_pos.x, capture_pos.y}, self->capture_size, target_pos, output_size, drm_fd->pixel_format, (vec2i){drm_fd->width, drm_fd->height}, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs)) {
+ fprintf(stderr, "gsr error: gsr_capture_kms_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
+ self->fast_path_failed = true;
+ }
+ } else {
+ self->fast_path_failed = true;
}
- gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
- target_pos, output_size,
- capture_pos, self->capture_size,
- gsr_monitor_rotation_to_rotation(self->monitor_rotation), self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
+ if(self->fast_path_failed) {
+ EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
+ if(image) {
+ gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
+ self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
+ }
+
+ gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
+ target_pos, output_size,
+ capture_pos, self->capture_size,
+ texture_rotation, self->external_texture_fallback, GSR_SOURCE_COLOR_RGB);
+ }
if(self->params.record_cursor) {
gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane);
@@ -655,12 +722,12 @@ static int gsr_capture_kms_capture(gsr_capture *cap, gsr_capture_metadata *captu
render_x11_cursor(self, color_conversion, cursor_monitor_offset, target_pos, output_size);
} else if(cursor_drm_fd) {
const vec2i framebuffer_size = rotate_capture_size_if_rotated(self, (vec2i){ drm_fd->src_w, drm_fd->src_h });
- render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, output_size, framebuffer_size);
+ render_drm_cursor(self, color_conversion, cursor_drm_fd, target_pos, texture_rotation, output_size, framebuffer_size);
}
}
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
gsr_capture_kms_cleanup_kms_fds(self);
diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c
index 5f47b00..b6a3671 100644
--- a/src/capture/nvfbc.c
+++ b/src/capture/nvfbc.c
@@ -390,16 +390,16 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, gsr_capture_metadata *cap
return 0;
}
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
gsr_color_conversion_draw(color_conversion, self->setup_params.dwTextures[grab_params.dwTextureIndex],
target_pos, (vec2i){output_size.x, output_size.y},
self->params.region_position, frame_size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_BGR);
+ 0.0f, false, GSR_SOURCE_COLOR_BGR);
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
return 0;
}
diff --git a/src/capture/portal.c b/src/capture/portal.c
index 56072d8..e065f02 100644
--- a/src/capture/portal.c
+++ b/src/capture/portal.c
@@ -23,6 +23,9 @@ typedef struct {
vec2i capture_size;
gsr_pipewire_video_dmabuf_data dmabuf_data[GSR_PIPEWIRE_VIDEO_DMABUF_MAX_PLANES];
int num_dmabuf_data;
+
+ bool fast_path_failed;
+ bool mesa_supports_compute_only_vaapi_copy;
} gsr_capture_portal;
static void gsr_capture_portal_cleanup_plane_fds(gsr_capture_portal *self) {
@@ -302,6 +305,12 @@ static int gsr_capture_portal_start(gsr_capture *cap, gsr_capture_metadata *capt
capture_metadata->height = self->params.output_resolution.y;
}
+ self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
+ if(self->fast_path_failed)
+ fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
+
+ self->mesa_supports_compute_only_vaapi_copy = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 3, 6);
+
return 0;
}
@@ -309,6 +318,16 @@ static int max_int(int a, int b) {
return a > b ? a : b;
}
+static void gsr_capture_portal_fail_fast_path_if_not_fast(gsr_capture_portal *self, uint32_t pixel_format) {
+ const uint8_t pixel_format_color_depth_1 = (pixel_format >> 16) & 0xFF;
+ if(!self->fast_path_failed && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !self->mesa_supports_compute_only_vaapi_copy && (pixel_format_color_depth_1 == '3' || pixel_format_color_depth_1 == '4')) {
+ self->fast_path_failed = true;
+ fprintf(stderr, "gsr warning: gsr_capture_kms_capture: the monitor you are recording is in 10/12-bit color format and your mesa version is <= 24.3.6, composition will be used."
+ " If you experience performance problems in the video then record on a single window on X11 instead or disable 10/12-bit color option in your desktop environment settings,"
+ " or try to record the monitor on X11 instead (if you aren't already doing that) or update your mesa version.\n");
+ }
+}
+
static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *capture_metadata, gsr_color_conversion *color_conversion) {
(void)color_conversion;
gsr_capture_portal *self = cap->priv;
@@ -329,21 +348,45 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
return 0;
}
+ gsr_capture_portal_fail_fast_path_if_not_fast(self, pipewire_fourcc);
+
const bool is_scaled = self->params.output_resolution.x > 0 && self->params.output_resolution.y > 0;
vec2i output_size = is_scaled ? self->params.output_resolution : self->capture_size;
output_size = scale_keep_aspect_ratio(self->capture_size, output_size);
const vec2i target_pos = { max_int(0, capture_metadata->width / 2 - output_size.x / 2), max_int(0, capture_metadata->height / 2 - output_size.y / 2) };
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
// TODO: Handle region crop
- gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
- target_pos, output_size,
- (vec2i){region.x, region.y}, self->capture_size,
- GSR_ROT_0, using_external_image, GSR_SOURCE_COLOR_RGB);
+ /* Fast opengl free path */
+ if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metadata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
+ int fds[4];
+ uint32_t offsets[4];
+ uint32_t pitches[4];
+ uint64_t modifiers[4];
+ for(int i = 0; i < self->num_dmabuf_data; ++i) {
+ fds[i] = self->dmabuf_data[i].fd;
+ offsets[i] = self->dmabuf_data[i].offset;
+ pitches[i] = self->dmabuf_data[i].stride;
+ modifiers[i] = pipewire_modifiers;
+ }
+ if(!vaapi_copy_drm_planes_to_video_surface(capture_metadata->video_codec_context, capture_metadata->frame, (vec2i){region.x, region.y}, self->capture_size, target_pos, output_size, pipewire_fourcc, self->capture_size, fds, offsets, pitches, modifiers, self->num_dmabuf_data)) {
+ fprintf(stderr, "gsr error: gsr_capture_portal_capture: vaapi_copy_drm_planes_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
+ self->fast_path_failed = true;
+ }
+ } else {
+ self->fast_path_failed = true;
+ }
+
+ if(self->fast_path_failed) {
+ gsr_color_conversion_draw(color_conversion, using_external_image ? self->texture_map.external_texture_id : self->texture_map.texture_id,
+ target_pos, output_size,
+ (vec2i){region.x, region.y}, self->capture_size,
+ 0.0f, using_external_image, GSR_SOURCE_COLOR_RGB);
+ }
if(self->params.record_cursor && self->texture_map.cursor_texture_id > 0 && cursor_region.width > 0) {
const vec2d scale = {
@@ -361,12 +404,12 @@ static int gsr_capture_portal_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_map.cursor_texture_id,
(vec2i){cursor_pos.x, cursor_pos.y}, (vec2i){cursor_region.width * scale.x, cursor_region.height * scale.y},
(vec2i){0, 0}, (vec2i){cursor_region.width, cursor_region.height},
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
gsr_capture_portal_cleanup_plane_fds(self);
diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c
index 16bc988..d8f4c27 100644
--- a/src/capture/xcomposite.c
+++ b/src/capture/xcomposite.c
@@ -34,6 +34,7 @@ typedef struct {
gsr_cursor cursor;
bool clear_background;
+ bool fast_path_failed;
} gsr_capture_xcomposite;
static void gsr_capture_xcomposite_stop(gsr_capture_xcomposite *self) {
@@ -116,6 +117,10 @@ static int gsr_capture_xcomposite_start(gsr_capture *cap, gsr_capture_metadata *
capture_metadata->height = self->params.output_resolution.y;
}
+ self->fast_path_failed = self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && !gl_driver_version_greater_than(&self->params.egl->gpu_info, 24, 0, 9);
+ if(self->fast_path_failed)
+ fprintf(stderr, "gsr warning: gsr_capture_kms_start: your amd driver (mesa) version is known to be buggy (<= version 24.0.9), falling back to opengl copy\n");
+
self->window_resize_timer = clock_get_monotonic_seconds();
return 0;
}
@@ -253,13 +258,25 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
const vec2i target_pos = { max_int(0, capture_metdata->width / 2 - output_size.x / 2), max_int(0, capture_metdata->height / 2 - output_size.y / 2) };
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
+
+ /* Fast opengl free path */
+ if(!self->fast_path_failed && video_codec_context_is_vaapi(capture_metdata->video_codec_context) && self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD) {
+ if(!vaapi_copy_egl_image_to_video_surface(self->params.egl, self->window_texture.image, (vec2i){0, 0}, self->texture_size, target_pos, output_size, capture_metdata->video_codec_context, capture_metdata->frame)) {
+ fprintf(stderr, "gsr error: gsr_capture_xcomposite_capture: vaapi_copy_egl_image_to_video_surface failed, falling back to opengl copy. Please report this as an issue at https://github.com/dec05eba/gpu-screen-recorder-issues\n");
+ self->fast_path_failed = true;
+ }
+ } else {
+ self->fast_path_failed = true;
+ }
- gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
- target_pos, output_size,
- (vec2i){0, 0}, self->texture_size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ if(self->fast_path_failed) {
+ gsr_color_conversion_draw(color_conversion, window_texture_get_opengl_texture_id(&self->window_texture),
+ target_pos, output_size,
+ (vec2i){0, 0}, self->texture_size,
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
+ }
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -280,13 +297,13 @@ static int gsr_capture_xcomposite_capture(gsr_capture *cap, gsr_capture_metadata
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
return 0;
}
diff --git a/src/capture/ximage.c b/src/capture/ximage.c
index ac00d72..259761d 100644
--- a/src/capture/ximage.c
+++ b/src/capture/ximage.c
@@ -160,7 +160,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->texture_id,
target_pos, output_size,
(vec2i){0, 0}, self->capture_size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
if(self->params.record_cursor && self->cursor.visible) {
const vec2d scale = {
@@ -181,7 +181,7 @@ static int gsr_capture_ximage_capture(gsr_capture *cap, gsr_capture_metadata *ca
gsr_color_conversion_draw(color_conversion, self->cursor.texture_id,
cursor_pos, (vec2i){self->cursor.size.x * scale.x, self->cursor.size.y * scale.y},
(vec2i){0, 0}, self->cursor.size,
- GSR_ROT_0, false, GSR_SOURCE_COLOR_RGB);
+ 0.0f, false, GSR_SOURCE_COLOR_RGB);
self->params.egl->glDisable(GL_SCISSOR_TEST);
}
diff --git a/src/color_conversion.c b/src/color_conversion.c
index 72390f5..c1fe894 100644
--- a/src/color_conversion.c
+++ b/src/color_conversion.c
@@ -5,17 +5,21 @@
#include <math.h>
#include <assert.h>
-// TODO: external texture
-// TODO: Scissor doesn't work with compute shader. In the compute shader this can be implemented with two step calls, and using the result
-// with a call to mix to choose source/output color.
-
-#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
-// TODO: Use the minimal barrier required and move this to egl.h
-#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
-
+#define MAX_SHADERS 4
#define MAX_FRAMEBUFFERS 2
#define EXTERNAL_TEXTURE_SHADER_OFFSET 2
+static float abs_f(float v) {
+ return v >= 0.0f ? v : -v;
+}
+
+#define ROTATE_Z "mat4 rotate_z(in float angle) {\n" \
+ " return mat4(cos(angle), -sin(angle), 0.0, 0.0,\n" \
+ " sin(angle), cos(angle), 0.0, 0.0,\n" \
+ " 0.0, 0.0, 1.0, 0.0,\n" \
+ " 0.0, 0.0, 0.0, 1.0);\n" \
+ "}\n"
+
/* https://en.wikipedia.org/wiki/YCbCr, see study/color_space_transform_matrix.png */
/* ITU-R BT2020, full */
@@ -44,10 +48,6 @@
" 0.060118, 0.429412, -0.038049, 0.000000,\n" \
" 0.062745, 0.500000, 0.500000, 1.000000);\n"
-static int max_int(int a, int b) {
- return a > b ? a : b;
-}
-
static const char* color_format_range_get_transform_matrix(gsr_destination_color color_format, gsr_color_range color_range) {
switch(color_format) {
case GSR_DESTINATION_COLOR_NV12: {
@@ -76,111 +76,187 @@ static const char* color_format_range_get_transform_matrix(gsr_destination_color
return NULL;
}
-// TODO: Make alpha blending optional
-// TODO: Optimize these shaders.
-static int load_compute_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
+static int load_shader_y(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
- const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
-
- char compute_shader[2048];
- snprintf(compute_shader, sizeof(compute_shader),
- "#version 430 core\n"
- "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
- "uniform sampler2D imgInput;\n"
- "uniform ivec2 source_position;\n"
- "uniform ivec2 target_position;\n"
- "uniform vec2 scale;\n"
- "uniform mat2 rotation_matrix;\n"
- "layout(%s, binding = 0) uniform image2D imgOutput;\n"
- "%s"
- "void main() {\n"
- " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
- " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
- " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
- " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
- " vec4 source_color = texture(imgInput, texCoord);\n"
- " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
- " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
- " float y_color = mix(output_color_yuv.r, source_color_yuv.r, source_color.a);\n"
- " imageStore(imgOutput, texelCoord + target_position, vec4(y_color, 1.0, 1.0, 1.0));\n"
- "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "r16" : "r8", color_transform_matrix);
-
- if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
+
+ char vertex_shader[2048];
+ snprintf(vertex_shader, sizeof(vertex_shader),
+ "#version 300 es \n"
+ "in vec2 pos; \n"
+ "in vec2 texcoords; \n"
+ "out vec2 texcoords_out; \n"
+ "uniform vec2 offset; \n"
+ "uniform float rotation; \n"
+ ROTATE_Z
+ "void main() \n"
+ "{ \n"
+ " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
+ " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
+ "} \n");
+
+ const char *main_code =
+ main_code =
+ " vec4 pixel = texture(tex1, texcoords_out); \n"
+ " FragColor.x = (RGBtoYUV * vec4(pixel.rgb, 1.0)).x; \n"
+ " FragColor.w = pixel.a; \n";
+
+ char fragment_shader[2048];
+ if(external_texture) {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "#extension GL_OES_EGL_image_external : enable \n"
+ "#extension GL_OES_EGL_image_external_essl3 : require \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform samplerExternalOES tex1; \n"
+ "out vec4 FragColor; \n"
+ "%s"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", color_transform_matrix, main_code);
+ } else {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform sampler2D tex1; \n"
+ "out vec4 FragColor; \n"
+ "%s"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", color_transform_matrix, main_code);
+ }
+
+ if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
- uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
- uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
- uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
- uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
+ gsr_shader_bind_attribute_location(shader, "pos", 0);
+ gsr_shader_bind_attribute_location(shader, "texcoords", 1);
+ uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
+ uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
-static int load_compute_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim, gsr_destination_color color_format, gsr_color_range color_range) {
+static unsigned int load_shader_uv(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, gsr_destination_color color_format, gsr_color_range color_range, bool external_texture) {
const char *color_transform_matrix = color_format_range_get_transform_matrix(color_format, color_range);
- const bool use_16bit_colors = color_format == GSR_DESTINATION_COLOR_P010;
-
- char compute_shader[2048];
- snprintf(compute_shader, sizeof(compute_shader),
- "#version 430 core\n"
- "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
- "uniform sampler2D imgInput;\n"
- "uniform ivec2 source_position;\n"
- "uniform ivec2 target_position;\n"
- "uniform vec2 scale;\n"
- "uniform mat2 rotation_matrix;\n"
- "layout(%s, binding = 0) uniform image2D imgOutput;\n"
- "%s"
- "void main() {\n"
- " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
- " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
- " vec2 rotated_texel_coord = vec2(texelCoord - source_position/2 - size/4) * rotation_matrix + vec2(size/4) + 0.5;\n"
- " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
- " vec4 source_color = texture(imgInput, texCoord * 2.0);\n"
- " vec4 source_color_yuv = RGBtoYUV * vec4(source_color.rgb, 1.0);\n"
- " vec4 output_color_yuv = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position/2);\n"
- " vec2 uv_color = mix(output_color_yuv.rg, source_color_yuv.gb, source_color.a);\n"
- " imageStore(imgOutput, texelCoord + target_position/2, vec4(uv_color, 1.0, 1.0));\n"
- "}\n", max_local_size_dim, max_local_size_dim, use_16bit_colors ? "rg16" : "rg8", color_transform_matrix);
-
- if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
+
+ char vertex_shader[2048];
+ snprintf(vertex_shader, sizeof(vertex_shader),
+ "#version 300 es \n"
+ "in vec2 pos; \n"
+ "in vec2 texcoords; \n"
+ "out vec2 texcoords_out; \n"
+ "uniform vec2 offset; \n"
+ "uniform float rotation; \n"
+ ROTATE_Z
+ "void main() \n"
+ "{ \n"
+ " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
+ " gl_Position = (vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0)) * vec4(0.5, 0.5, 1.0, 1.0) - vec4(0.5, 0.5, 0.0, 0.0); \n"
+ "} \n");
+
+ const char *main_code =
+ main_code =
+ " vec4 pixel = texture(tex1, texcoords_out); \n"
+ " FragColor.xy = (RGBtoYUV * vec4(pixel.rgb, 1.0)).yz; \n"
+ " FragColor.w = pixel.a; \n";
+
+ char fragment_shader[2048];
+ if(external_texture) {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "#extension GL_OES_EGL_image_external : enable \n"
+ "#extension GL_OES_EGL_image_external_essl3 : require \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform samplerExternalOES tex1; \n"
+ "out vec4 FragColor; \n"
+ "%s"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", color_transform_matrix, main_code);
+ } else {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform sampler2D tex1; \n"
+ "out vec4 FragColor; \n"
+ "%s"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", color_transform_matrix, main_code);
+ }
+
+ if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
- uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
- uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
- uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
- uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
+ gsr_shader_bind_attribute_location(shader, "pos", 0);
+ gsr_shader_bind_attribute_location(shader, "texcoords", 1);
+ uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
+ uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
-static int load_compute_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, int max_local_size_dim) {
- char compute_shader[2048];
- snprintf(compute_shader, sizeof(compute_shader),
- "#version 430 core\n"
- "layout (local_size_x = %d, local_size_y = %d, local_size_z = 1) in;\n"
- "uniform sampler2D imgInput;\n"
- "uniform ivec2 source_position;\n"
- "uniform ivec2 target_position;\n"
- "uniform vec2 scale;\n"
- "uniform mat2 rotation_matrix;\n"
- "layout(rgba8, binding = 0) uniform image2D imgOutput;\n"
- "void main() {\n"
- " ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);\n"
- " ivec2 size = ivec2(vec2(textureSize(imgInput, 0)) * scale + 0.5);\n"
- " vec2 rotated_texel_coord = vec2(texelCoord - source_position - size/2) * rotation_matrix + vec2(size/2) + 0.5;\n"
- " vec2 texCoord = vec2(rotated_texel_coord)/vec2(size);\n"
- " vec4 source_color = texture(imgInput, texCoord);\n"
- //" vec4 output_color = imageLoad(imgOutput, ivec2(rotated_texel_coord) + target_position);\n"
- //" vec3 color = mix(output_color.rgb, source_color.rgb, source_color.a);\n"
- " imageStore(imgOutput, texelCoord + target_position, source_color);\n"
- "}\n", max_local_size_dim, max_local_size_dim);
-
- if(gsr_shader_init(shader, egl, NULL, NULL, compute_shader) != 0)
+static unsigned int load_shader_rgb(gsr_shader *shader, gsr_egl *egl, gsr_color_uniforms *uniforms, bool external_texture) {
+ char vertex_shader[2048];
+ snprintf(vertex_shader, sizeof(vertex_shader),
+ "#version 300 es \n"
+ "in vec2 pos; \n"
+ "in vec2 texcoords; \n"
+ "out vec2 texcoords_out; \n"
+ "uniform vec2 offset; \n"
+ "uniform float rotation; \n"
+ ROTATE_Z
+ "void main() \n"
+ "{ \n"
+ " texcoords_out = (vec4(texcoords.x - 0.5, texcoords.y - 0.5, 0.0, 0.0) * rotate_z(rotation)).xy + vec2(0.5, 0.5); \n"
+ " gl_Position = vec4(offset.x, offset.y, 0.0, 0.0) + vec4(pos.x, pos.y, 0.0, 1.0); \n"
+ "} \n");
+
+ const char *main_code =
+ main_code =
+ " vec4 pixel = texture(tex1, texcoords_out); \n"
+ " FragColor = pixel; \n";
+
+ char fragment_shader[2048];
+ if(external_texture) {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "#extension GL_OES_EGL_image_external : enable \n"
+ "#extension GL_OES_EGL_image_external_essl3 : require \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform samplerExternalOES tex1; \n"
+ "out vec4 FragColor; \n"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", main_code);
+ } else {
+ snprintf(fragment_shader, sizeof(fragment_shader),
+ "#version 300 es \n"
+ "precision highp float; \n"
+ "in vec2 texcoords_out; \n"
+ "uniform sampler2D tex1; \n"
+ "out vec4 FragColor; \n"
+ "void main() \n"
+ "{ \n"
+ "%s"
+ "} \n", main_code);
+ }
+
+ if(gsr_shader_init(shader, egl, vertex_shader, fragment_shader) != 0)
return -1;
- uniforms->source_position = egl->glGetUniformLocation(shader->program_id, "source_position");
- uniforms->target_position = egl->glGetUniformLocation(shader->program_id, "target_position");
- uniforms->rotation_matrix = egl->glGetUniformLocation(shader->program_id, "rotation_matrix");
- uniforms->scale = egl->glGetUniformLocation(shader->program_id, "scale");
+ gsr_shader_bind_attribute_location(shader, "pos", 0);
+ gsr_shader_bind_attribute_location(shader, "texcoords", 1);
+ uniforms->offset = egl->glGetUniformLocation(shader->program_id, "offset");
+ uniforms->rotation = egl->glGetUniformLocation(shader->program_id, "rotation");
return 0;
}
@@ -239,11 +315,6 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
memset(self, 0, sizeof(*self));
self->params.egl = params->egl;
self->params = *params;
-
- int max_compute_work_group_invocations = 256;
- self->params.egl->glGetIntegerv(GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS, &max_compute_work_group_invocations);
- self->max_local_size_dim = sqrt(max_compute_work_group_invocations);
- fprintf(stderr, "max local size: %d, max_local_size_dim: %d\n", max_compute_work_group_invocations, self->max_local_size_dim);
switch(params->destination_color) {
case GSR_DESTINATION_COLOR_NV12:
@@ -253,15 +324,27 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
- if(load_compute_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
+ if(load_shader_y(&self->shaders[0], self->params.egl, &self->uniforms[0], params->destination_color, params->color_range, false) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
- if(load_compute_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], self->max_local_size_dim, params->destination_color, params->color_range) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV compute shader\n");
+ if(load_shader_uv(&self->shaders[1], self->params.egl, &self->uniforms[1], params->destination_color, params->color_range, false) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
goto err;
}
+
+ if(self->params.load_external_image_shader) {
+ if(load_shader_y(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], params->destination_color, params->color_range, true) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
+ goto err;
+ }
+
+ if(load_shader_uv(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET + 1], params->destination_color, params->color_range, true) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load UV shader\n");
+ goto err;
+ }
+ }
break;
}
case GSR_DESTINATION_COLOR_RGB8: {
@@ -270,10 +353,17 @@ int gsr_color_conversion_init(gsr_color_conversion *self, const gsr_color_conver
return -1;
}
- if(load_compute_shader_rgb(&self->shaders[2], self->params.egl, &self->uniforms[2], self->max_local_size_dim) != 0) {
- fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y compute shader\n");
+ if(load_shader_rgb(&self->shaders[0], self->params.egl, &self->uniforms[0], false) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
goto err;
}
+
+ if(self->params.load_external_image_shader) {
+ if(load_shader_rgb(&self->shaders[EXTERNAL_TEXTURE_SHADER_OFFSET], self->params.egl, &self->uniforms[EXTERNAL_TEXTURE_SHADER_OFFSET], true) != 0) {
+ fprintf(stderr, "gsr error: gsr_color_conversion_init: failed to load Y shader\n");
+ goto err;
+ }
+ }
break;
}
}
@@ -310,119 +400,127 @@ void gsr_color_conversion_deinit(gsr_color_conversion *self) {
self->framebuffers[i] = 0;
}
- for(int i = 0; i < GSR_COLOR_CONVERSION_MAX_SHADERS; ++i) {
+ for(int i = 0; i < MAX_SHADERS; ++i) {
gsr_shader_deinit(&self->shaders[i]);
}
self->params.egl = NULL;
}
-static void gsr_color_conversion_apply_rotation(gsr_rotation rotation, float rotation_matrix[2][2], vec2i *source_position, vec2i texture_size, vec2f scale) {
- /*
- rotation_matrix[0][0] = cos(angle);
- rotation_matrix[0][1] = -sin(angle);
- rotation_matrix[1][0] = sin(angle);
- rotation_matrix[1][1] = cos(angle);
- The manual matrix code below is the same as this code above, but without floating-point errors.
- This is done to remove any blurring caused by these floating-point errors.
- */
- switch(rotation) {
- case GSR_ROT_0:
- rotation_matrix[0][0] = 1.0f;
- rotation_matrix[0][1] = 0.0f;
- rotation_matrix[1][0] = 0.0f;
- rotation_matrix[1][1] = 1.0f;
- break;
- case GSR_ROT_90:
- rotation_matrix[0][0] = 0.0f;
- rotation_matrix[0][1] = -1.0f;
- rotation_matrix[1][0] = 1.0f;
- rotation_matrix[1][1] = 0.0f;
- source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
- source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
- break;
- case GSR_ROT_180:
- rotation_matrix[0][0] = -1.0f;
- rotation_matrix[0][1] = 0.0f;
- rotation_matrix[1][0] = 0.0f;
- rotation_matrix[1][1] = -1.0f;
- break;
- case GSR_ROT_270:
- rotation_matrix[0][0] = 0.0f;
- rotation_matrix[0][1] = 1.0f;
- rotation_matrix[1][0] = -1.0f;
- rotation_matrix[1][1] = 0.0f;
- source_position->x += (((double)texture_size.x*0.5 - (double)texture_size.y*0.5) * scale.x + 0.5);
- source_position->y += (((double)texture_size.y*0.5 - (double)texture_size.x*0.5) * scale.y + 0.5);
- break;
+static void gsr_color_conversion_swizzle_texture_source(gsr_color_conversion *self, gsr_source_color source_color) {
+ if(source_color == GSR_SOURCE_COLOR_BGR) {
+ const int swizzle_mask[] = { GL_BLUE, GL_GREEN, GL_RED, 1 };
+ self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
}
}
-// TODO: Handle source_color
-void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i destination_pos, vec2i destination_size, vec2i texture_pos, vec2i texture_size, gsr_rotation rotation, bool external_texture, gsr_source_color source_color) {
- vec2f scale = {0.0f, 0.0f};
- if(texture_size.x > 0 && texture_size.y > 0)
- scale = (vec2f){ (double)destination_size.x/(double)texture_size.x, (double)destination_size.y/(double)texture_size.y };
+static void gsr_color_conversion_swizzle_reset(gsr_color_conversion *self, gsr_source_color source_color) {
+ if(source_color == GSR_SOURCE_COLOR_BGR) {
+ const int swizzle_mask[] = { GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA };
+ self->params.egl->glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzle_mask);
+ }
+}
- vec2i source_position = {0, 0};
- float rotation_matrix[2][2] = {{0, 0}, {0, 0}};
- gsr_color_conversion_apply_rotation(rotation, rotation_matrix, &source_position, texture_size, scale);
+/* |source_pos| is in pixel coordinates and |source_size| */
+void gsr_color_conversion_draw(gsr_color_conversion *self, unsigned int texture_id, vec2i source_pos, vec2i source_size, vec2i texture_pos, vec2i texture_size, float rotation, bool external_texture, gsr_source_color source_color) {
+ // TODO: Remove this crap
+ rotation = M_PI*2.0f - rotation;
- source_position.x += texture_pos.x;
- source_position.y += texture_pos.y;
+ /* TODO: Do not call this every frame? */
+ vec2i dest_texture_size = {0, 0};
+ self->params.egl->glBindTexture(GL_TEXTURE_2D, self->params.destination_textures[0]);
+ self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &dest_texture_size.x);
+ self->params.egl->glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &dest_texture_size.y);
+ self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
const int texture_target = external_texture ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D;
+
self->params.egl->glBindTexture(texture_target, texture_id);
- switch(self->params.destination_color) {
- case GSR_DESTINATION_COLOR_NV12:
- case GSR_DESTINATION_COLOR_P010: {
- const bool use_16bit_colors = self->params.destination_color == GSR_DESTINATION_COLOR_P010;
- // Y
- {
- gsr_shader_use(&self->shaders[0]);
- self->params.egl->glUniformMatrix2fv(self->uniforms[0].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
- self->params.egl->glUniform2i(self->uniforms[0].source_position, source_position.x, source_position.y);
- self->params.egl->glUniform2i(self->uniforms[0].target_position, destination_pos.x, destination_pos.y);
- self->params.egl->glUniform2f(self->uniforms[0].scale, scale.x, scale.y);
- self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_R16 : GL_R8);
- const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
- const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
- self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
- }
+ vec2i source_texture_size = {0, 0};
+ if(external_texture) {
+ assert(self->params.load_external_image_shader);
+ source_texture_size = source_size;
+ } else {
+ /* TODO: Do not call this every frame? */
+ self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_WIDTH, &source_texture_size.x);
+ self->params.egl->glGetTexLevelParameteriv(texture_target, 0, GL_TEXTURE_HEIGHT, &source_texture_size.y);
+ }
- // UV
- {
- gsr_shader_use(&self->shaders[1]);
- self->params.egl->glUniformMatrix2fv(self->uniforms[1].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
- self->params.egl->glUniform2i(self->uniforms[1].source_position, source_position.x, source_position.y);
- self->params.egl->glUniform2i(self->uniforms[1].target_position, destination_pos.x, destination_pos.y);
- self->params.egl->glUniform2f(self->uniforms[1].scale, scale.x, scale.y);
- self->params.egl->glBindImageTexture(0, self->params.destination_textures[1], 0, GL_FALSE, 0, GL_READ_WRITE, use_16bit_colors ? GL_RG16 : GL_RG8);
- const double num_groups_x = (double)texture_size.x*0.5/(double)self->max_local_size_dim + 0.5;
- const double num_groups_y = (double)texture_size.y*0.5/(double)self->max_local_size_dim + 0.5;
- self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
- }
- break;
- }
- case GSR_DESTINATION_COLOR_RGB8: {
- gsr_shader_use(&self->shaders[2]);
- self->params.egl->glUniformMatrix2fv(self->uniforms[2].rotation_matrix, 1, GL_TRUE, (const float*)rotation_matrix);
- self->params.egl->glUniform2i(self->uniforms[2].source_position, source_position.x, source_position.y);
- self->params.egl->glUniform2i(self->uniforms[2].target_position, destination_pos.x, destination_pos.y);
- self->params.egl->glUniform2f(self->uniforms[2].scale, scale.x, scale.y);
- self->params.egl->glBindImageTexture(0, self->params.destination_textures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
- const double num_groups_x = (double)texture_size.x/(double)self->max_local_size_dim + 0.5;
- const double num_groups_y = (double)texture_size.y/(double)self->max_local_size_dim + 0.5;
- self->params.egl->glDispatchCompute(max_int(1, num_groups_x), max_int(1, num_groups_y), 1);
- break;
- }
+ // TODO: Remove this crap
+ if(abs_f(M_PI * 0.5f - rotation) <= 0.001f || abs_f(M_PI * 1.5f - rotation) <= 0.001f) {
+ float tmp = source_texture_size.x;
+ source_texture_size.x = source_texture_size.y;
+ source_texture_size.y = tmp;
+ }
+
+ const vec2f pos_norm = {
+ ((float)source_pos.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
+ ((float)source_pos.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
+ };
+
+ const vec2f size_norm = {
+ ((float)source_size.x / (dest_texture_size.x == 0 ? 1.0f : (float)dest_texture_size.x)) * 2.0f,
+ ((float)source_size.y / (dest_texture_size.y == 0 ? 1.0f : (float)dest_texture_size.y)) * 2.0f,
+ };
+
+ const vec2f texture_pos_norm = {
+ (float)texture_pos.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
+ (float)texture_pos.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
+ };
+
+ const vec2f texture_size_norm = {
+ (float)texture_size.x / (source_texture_size.x == 0 ? 1.0f : (float)source_texture_size.x),
+ (float)texture_size.y / (source_texture_size.y == 0 ? 1.0f : (float)source_texture_size.y),
+ };
+
+ const float vertices[] = {
+ -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
+ -1.0f + 0.0f, -1.0f + 0.0f, texture_pos_norm.x, texture_pos_norm.y,
+ -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
+
+ -1.0f + 0.0f, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x, texture_pos_norm.y + texture_size_norm.y,
+ -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y,
+ -1.0f + 0.0f + size_norm.x, -1.0f + 0.0f + size_norm.y, texture_pos_norm.x + texture_size_norm.x, texture_pos_norm.y + texture_size_norm.y
+ };
+
+ gsr_color_conversion_swizzle_texture_source(self, source_color);
+
+ self->params.egl->glBindVertexArray(self->vertex_array_object_id);
+ self->params.egl->glViewport(0, 0, dest_texture_size.x, dest_texture_size.y);
+
+ /* TODO: this, also cleanup */
+ //self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id);
+ self->params.egl->glBufferSubData(GL_ARRAY_BUFFER, 0, 24 * sizeof(float), vertices);
+
+ {
+ self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[0]);
+ //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT); // TODO: Do this in a separate clear_ function. We want to do that when using multiple drm to create the final image (multiple monitors for example)
+
+ const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET : 0;
+ gsr_shader_use(&self->shaders[shader_index]);
+ self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
+ self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
+ self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
}
- self->params.egl->glMemoryBarrier(GL_ALL_BARRIER_BITS); // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
- self->params.egl->glUseProgram(0);
+ if(self->params.num_destination_textures > 1) {
+ self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, self->framebuffers[1]);
+ //cap_xcomp->params.egl->glClear(GL_COLOR_BUFFER_BIT);
+ const int shader_index = external_texture ? EXTERNAL_TEXTURE_SHADER_OFFSET + 1 : 1;
+ gsr_shader_use(&self->shaders[shader_index]);
+ self->params.egl->glUniform1f(self->uniforms[shader_index].rotation, rotation);
+ self->params.egl->glUniform2f(self->uniforms[shader_index].offset, pos_norm.x, pos_norm.y);
+ self->params.egl->glDrawArrays(GL_TRIANGLES, 0, 6);
+ }
+
+ self->params.egl->glBindVertexArray(0);
+ gsr_shader_use_none(&self->shaders[0]);
self->params.egl->glBindTexture(texture_target, 0);
+ self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
+
+ gsr_color_conversion_swizzle_reset(self, source_color);
}
void gsr_color_conversion_clear(gsr_color_conversion *self) {
@@ -459,7 +557,3 @@ void gsr_color_conversion_clear(gsr_color_conversion *self) {
self->params.egl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
-
-gsr_rotation gsr_monitor_rotation_to_rotation(gsr_monitor_rotation monitor_rotation) {
- return (gsr_rotation)monitor_rotation;
-}
diff --git a/src/egl.c b/src/egl.c
index 1138ae7..7c9a877 100644
--- a/src/egl.c
+++ b/src/egl.c
@@ -284,7 +284,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenTextures, "glGenTextures" },
{ (void**)&self->glDeleteTextures, "glDeleteTextures" },
{ (void**)&self->glBindTexture, "glBindTexture" },
- { (void**)&self->glBindImageTexture, "glBindImageTexture" },
{ (void**)&self->glTexParameteri, "glTexParameteri" },
{ (void**)&self->glTexParameteriv, "glTexParameteriv" },
{ (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" },
@@ -294,8 +293,6 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGenFramebuffers, "glGenFramebuffers" },
{ (void**)&self->glBindFramebuffer, "glBindFramebuffer" },
{ (void**)&self->glDeleteFramebuffers, "glDeleteFramebuffers" },
- { (void**)&self->glDispatchCompute, "glDispatchCompute" },
- { (void**)&self->glMemoryBarrier, "glMemoryBarrier" },
{ (void**)&self->glViewport, "glViewport" },
{ (void**)&self->glFramebufferTexture2D, "glFramebufferTexture2D" },
{ (void**)&self->glDrawBuffers, "glDrawBuffers" },
@@ -332,14 +329,11 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) {
{ (void**)&self->glGetUniformLocation, "glGetUniformLocation" },
{ (void**)&self->glUniform1f, "glUniform1f" },
{ (void**)&self->glUniform2f, "glUniform2f" },
- { (void**)&self->glUniform2i, "glUniform2i" },
- { (void**)&self->glUniformMatrix2fv, "glUniformMatrix2fv" },
{ (void**)&self->glDebugMessageCallback, "glDebugMessageCallback" },
{ (void**)&self->glScissor, "glScissor" },
{ (void**)&self->glReadPixels, "glReadPixels" },
{ (void**)&self->glMapBuffer, "glMapBuffer" },
{ (void**)&self->glUnmapBuffer, "glUnmapBuffer" },
- { (void**)&self->glGetIntegerv, "glGetIntegerv" },
{ NULL, NULL }
};
diff --git a/src/encoder/video/software.c b/src/encoder/video/software.c
index 627cdea..3649ff1 100644
--- a/src/encoder/video/software.c
+++ b/src/encoder/video/software.c
@@ -83,8 +83,8 @@ static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder
self->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
// cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface);
- //self->params.egl->glFlush();
- //self->params.egl->glFinish();
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
}
static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) {
diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c
index 1d5dae0..8bb2f08 100644
--- a/src/encoder/video/vaapi.c
+++ b/src/encoder/video/vaapi.c
@@ -123,8 +123,8 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self
self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
while(self->params.egl->glGetError()) {}
while(self->params.egl->eglGetError() != EGL_SUCCESS){}
diff --git a/src/image_writer.c b/src/image_writer.c
index e153a34..c4d66f4 100644
--- a/src/image_writer.c
+++ b/src/image_writer.c
@@ -17,7 +17,7 @@ bool gsr_image_writer_init_opengl(gsr_image_writer *self, gsr_egl *egl, int widt
self->egl = egl;
self->width = width;
self->height = height;
- self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGBA8, GL_RGBA, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
+ self->texture = gl_create_texture(self->egl, self->width, self->height, GL_RGB8, GL_RGB, GL_NEAREST); /* TODO: use GL_RGB16 instead of GL_RGB8 for hdr/10-bit */
if(self->texture == 0) {
fprintf(stderr, "gsr error: gsr_image_writer_init: failed to create texture\n");
return false;
@@ -50,10 +50,10 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
bool success = false;
switch(image_format) {
case GSR_IMAGE_FORMAT_JPEG:
- success = stbi_write_jpg(filepath, self->width, self->height, 4, data, quality);
+ success = stbi_write_jpg(filepath, self->width, self->height, 3, data, quality);
break;
case GSR_IMAGE_FORMAT_PNG:
- success = stbi_write_png(filepath, self->width, self->height, 4, data, 0);
+ success = stbi_write_png(filepath, self->width, self->height, 3, data, 0);
break;
}
@@ -65,7 +65,7 @@ static bool gsr_image_writer_write_memory_to_file(gsr_image_writer *self, const
static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self, const char *filepath, gsr_image_format image_format, int quality) {
assert(self->source == GSR_IMAGE_WRITER_SOURCE_OPENGL);
- uint8_t *frame_data = malloc(self->width * self->height * 4);
+ uint8_t *frame_data = malloc(self->width * self->height * 3);
if(!frame_data) {
fprintf(stderr, "gsr error: gsr_image_writer_write_to_file: failed to allocate memory for image frame\n");
return false;
@@ -74,7 +74,7 @@ static bool gsr_image_writer_write_opengl_texture_to_file(gsr_image_writer *self
// TODO: hdr support
self->egl->glBindTexture(GL_TEXTURE_2D, self->texture);
// We could use glGetTexSubImage, but it's only available starting from opengl 4.5
- self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame_data);
+ self->egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, frame_data);
self->egl->glBindTexture(GL_TEXTURE_2D, 0);
self->egl->glFlush();
diff --git a/src/main.cpp b/src/main.cpp
index f0a3e06..6adeb05 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1072,9 +1072,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
// TODO: More quality options
if(low_power)
av_dict_set_int(&options, "low_power", 1, 0);
- // Improves performance but increases vram.
- // TODO: Might need a different async_depth for optimal performance on different amd/intel gpus
- //av_dict_set_int(&options, "async_depth", 3, 0);
+ // Improves performance but increases vram
+ //av_dict_set_int(&options, "async_depth", 8, 0);
if(codec_context->codec_id == AV_CODEC_ID_H264) {
// Removed because it causes stutter in games for some people
diff --git a/src/shader.c b/src/shader.c
index b9fbb62..dcb956b 100644
--- a/src/shader.c
+++ b/src/shader.c
@@ -36,36 +36,28 @@ static unsigned int loader_shader(gsr_egl *egl, unsigned int type, const char *s
return shader_id;
}
-static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
+static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
unsigned int vertex_shader_id = 0;
unsigned int fragment_shader_id = 0;
- unsigned int compute_shader_id = 0;
unsigned int program_id = 0;
int linked = 0;
- bool success = false;
if(vertex_shader) {
vertex_shader_id = loader_shader(egl, GL_VERTEX_SHADER, vertex_shader);
if(vertex_shader_id == 0)
- goto done;
+ goto err;
}
if(fragment_shader) {
fragment_shader_id = loader_shader(egl, GL_FRAGMENT_SHADER, fragment_shader);
if(fragment_shader_id == 0)
- goto done;
- }
-
- if(compute_shader) {
- compute_shader_id = loader_shader(egl, GL_COMPUTE_SHADER, compute_shader);
- if(compute_shader_id == 0)
- goto done;
+ goto err;
}
program_id = egl->glCreateProgram();
if(program_id == 0) {
fprintf(stderr, "gsr error: load_program: failed to create shader program, error: %d\n", egl->glGetError());
- goto done;
+ goto err;
}
if(vertex_shader_id)
@@ -74,9 +66,6 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
if(fragment_shader_id)
egl->glAttachShader(program_id, fragment_shader_id);
- if(compute_shader_id)
- egl->glAttachShader(program_id, compute_shader_id);
-
egl->glLinkProgram(program_id);
egl->glGetProgramiv(program_id, GL_LINK_STATUS, &linked);
@@ -90,36 +79,37 @@ static unsigned int load_program(gsr_egl *egl, const char *vertex_shader, const
fprintf(stderr, "gsr error: load program: linking shader program failed, error:\n%s\n", info_log);
}
- goto done;
+ goto err;
}
- success = true;
- done:
-
- if(!success) {
- if(program_id)
- egl->glDeleteProgram(program_id);
- }
- if(compute_shader_id)
- egl->glDeleteShader(compute_shader_id);
if(fragment_shader_id)
egl->glDeleteShader(fragment_shader_id);
if(vertex_shader_id)
egl->glDeleteShader(vertex_shader_id);
+
return program_id;
+
+ err:
+ if(program_id)
+ egl->glDeleteProgram(program_id);
+ if(fragment_shader_id)
+ egl->glDeleteShader(fragment_shader_id);
+ if(vertex_shader_id)
+ egl->glDeleteShader(vertex_shader_id);
+ return 0;
}
-int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader, const char *compute_shader) {
+int gsr_shader_init(gsr_shader *self, gsr_egl *egl, const char *vertex_shader, const char *fragment_shader) {
assert(egl);
self->egl = egl;
self->program_id = 0;
- if(!vertex_shader && !fragment_shader && !compute_shader) {
- fprintf(stderr, "gsr error: gsr_shader_init: vertex, fragment shader and compute shaders can't be NULL at the same time\n");
+ if(!vertex_shader && !fragment_shader) {
+ fprintf(stderr, "gsr error: gsr_shader_init: vertex shader and fragment shader can't be NULL at the same time\n");
return -1;
}
- self->program_id = load_program(self->egl, vertex_shader, fragment_shader, compute_shader);
+ self->program_id = load_program(self->egl, vertex_shader, fragment_shader);
if(self->program_id == 0)
return -1;
diff --git a/src/utils.c b/src/utils.c
index 943fb2d..325f750 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -14,8 +14,10 @@
#include <xf86drmMode.h>
#include <xf86drm.h>
+#include <libdrm/drm_fourcc.h>
#include <X11/Xatom.h>
#include <X11/extensions/Xrandr.h>
+#include <va/va_drmcommon.h>
#include <libavcodec/avcodec.h>
#include <libavutil/hwcontext_vaapi.h>
@@ -661,6 +663,241 @@ bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) {
return device_context->type == AV_HWDEVICE_TYPE_VAAPI;
}
+static uint32_t drm_fourcc_to_va_fourcc(uint32_t drm_fourcc) {
+ switch(drm_fourcc) {
+ case DRM_FORMAT_XRGB8888: return VA_FOURCC_BGRX;
+ case DRM_FORMAT_XBGR8888: return VA_FOURCC_RGBX;
+ case DRM_FORMAT_RGBX8888: return VA_FOURCC_XBGR;
+ case DRM_FORMAT_BGRX8888: return VA_FOURCC_XRGB;
+ case DRM_FORMAT_ARGB8888: return VA_FOURCC_BGRA;
+ case DRM_FORMAT_ABGR8888: return VA_FOURCC_RGBA;
+ case DRM_FORMAT_RGBA8888: return VA_FOURCC_ABGR;
+ case DRM_FORMAT_BGRA8888: return VA_FOURCC_ARGB;
+ default: return drm_fourcc;
+ }
+}
+
+bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, uint32_t format, vec2i size, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) {
+ VAConfigID config_id = 0;
+ VAContextID context_id = 0;
+ VASurfaceID input_surface_id = 0;
+ VABufferID buffer_id = 0;
+ bool success = true;
+
+ VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context);
+ if(!va_dpy) {
+ success = false;
+ goto done;
+ }
+
+ VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %s\n", vaErrorStr(va_status));
+ success = false;
+ goto done;
+ }
+
+ VASurfaceID output_surface_id = (uintptr_t)video_frame->data[3];
+ va_status = vaCreateContext(va_dpy, config_id, size.x, size.y, VA_PROGRESSIVE, &output_surface_id, 1, &context_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %s\n", vaErrorStr(va_status));
+ success = false;
+ goto done;
+ }
+
+ VADRMPRIMESurfaceDescriptor buf = {0};
+ buf.fourcc = drm_fourcc_to_va_fourcc(format);//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10
+ buf.width = size.x;
+ buf.height = size.y;
+ buf.num_objects = num_planes;
+ buf.num_layers = 1;
+ buf.layers[0].drm_format = format;
+ buf.layers[0].num_planes = buf.num_objects;
+ for(int i = 0; i < num_planes; ++i) {
+ buf.objects[i].fd = fds[i];
+ buf.objects[i].size = size.y * pitches[i]; // TODO:
+ buf.objects[i].drm_format_modifier = modifiers[i];
+
+ buf.layers[0].object_index[i] = i;
+ buf.layers[0].offset[i] = offsets[i];
+ buf.layers[0].pitch[i] = pitches[i];
+ }
+
+ VASurfaceAttrib attribs[2] = {0};
+ attribs[0].type = VASurfaceAttribMemoryType;
+ attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[0].value.type = VAGenericValueTypeInteger;
+ attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
+ attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
+ attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[1].value.type = VAGenericValueTypePointer;
+ attribs[1].value.value.p = &buf;
+
+ // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10
+ // TODO: Max size same as source_size
+ va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, size.x, size.y, &input_surface_id, 1, attribs, 2);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %s\n", vaErrorStr(va_status));
+ success = false;
+ goto done;
+ }
+
+ const VARectangle source_region = {
+ .x = source_pos.x,
+ .y = source_pos.y,
+ .width = source_size.x,
+ .height = source_size.y
+ };
+
+ const VARectangle output_region = {
+ .x = dest_pos.x,
+ .y = dest_pos.y,
+ .width = dest_size.x,
+ .height = dest_size.y
+ };
+
+ const bool scaled = dest_size.x != source_size.x || dest_size.y != source_size.y;
+
+ // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi!
+ VAProcPipelineParameterBuffer params = {0};
+ params.surface = input_surface_id;
+ params.surface_region = NULL;
+ params.surface_region = &source_region;
+ params.output_region = &output_region;
+ params.output_background_color = 0;
+ params.filter_flags = scaled ? (VA_FILTER_SCALING_HQ | VA_FILTER_INTERPOLATION_BILINEAR) : 0;
+ params.pipeline_flags = VA_PROC_PIPELINE_FAST;
+
+ params.input_color_properties.colour_primaries = 1;
+ params.input_color_properties.transfer_characteristics = 1;
+ params.input_color_properties.matrix_coefficients = 1;
+ params.surface_color_standard = VAProcColorStandardBT709; // TODO:
+ params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
+
+ params.output_color_properties.colour_primaries = 1;
+ params.output_color_properties.transfer_characteristics = 1;
+ params.output_color_properties.matrix_coefficients = 1;
+ params.output_color_standard = VAProcColorStandardBT709; // TODO:
+ params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
+
+ params.processing_mode = VAProcPerformanceMode;
+
+ // VAProcPipelineCaps pipeline_caps = {0};
+ // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy,
+ // self->context_id,
+ // NULL, 0,
+ // &pipeline_caps);
+ // if(va_status == VA_STATUS_SUCCESS) {
+ // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags);
+ // }
+
+ // TODO: params.output_hdr_metadata
+
+ // TODO:
+ // if (first surface to render)
+ // pipeline_param->output_background_color = 0xff000000; // black
+
+ va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, &params, &buffer_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaBeginPicture(va_dpy, context_id, output_surface_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1);
+ if(va_status != VA_STATUS_SUCCESS) {
+ vaEndPicture(va_dpy, context_id);
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaEndPicture(va_dpy, context_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ // vaSyncBuffer(va_dpy, buffer_id, 1000 * 1000 * 1000);
+ // vaSyncSurface(va_dpy, input_surface_id);
+ // vaSyncSurface(va_dpy, output_surface_id);
+
+ done:
+ if(buffer_id)
+ vaDestroyBuffer(va_dpy, buffer_id);
+
+ if(input_surface_id)
+ vaDestroySurfaces(va_dpy, &input_surface_id, 1);
+
+ if(context_id)
+ vaDestroyContext(va_dpy, context_id);
+
+ if(config_id)
+ vaDestroyConfig(va_dpy, config_id);
+
+ return success;
+}
+
+bool vaapi_copy_egl_image_to_video_surface(gsr_egl *egl, EGLImage image, vec2i source_pos, vec2i source_size, vec2i dest_pos, vec2i dest_size, AVCodecContext *video_codec_context, AVFrame *video_frame) {
+ if(!image)
+ return false;
+
+ int texture_fourcc = 0;
+ int texture_num_planes = 0;
+ uint64_t texture_modifiers = 0;
+ if(!egl->eglExportDMABUFImageQueryMESA(egl->egl_display, image, &texture_fourcc, &texture_num_planes, &texture_modifiers)) {
+ fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageQueryMESA failed\n");
+ return false;
+ }
+
+ if(texture_num_planes <= 0 || texture_num_planes > 8) {
+ fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: expected planes size to be 0<planes<=8 for drm buf, got %d planes\n", texture_num_planes);
+ return false;
+ }
+
+ int texture_fds[8];
+ int32_t texture_strides[8];
+ int32_t texture_offsets[8];
+
+ while(egl->eglGetError() != EGL_SUCCESS){}
+ if(!egl->eglExportDMABUFImageMESA(egl->egl_display, image, texture_fds, texture_strides, texture_offsets)) {
+ fprintf(stderr, "gsr error: gsr_capture_xcomposite_vaapi_tick: eglExportDMABUFImageMESA failed, error: %d\n", egl->eglGetError());
+ return false;
+ }
+
+ int fds[8];
+ uint32_t offsets[8];
+ uint32_t pitches[8];
+ uint64_t modifiers[8];
+ for(int i = 0; i < texture_num_planes; ++i) {
+ fds[i] = texture_fds[i];
+ offsets[i] = texture_offsets[i];
+ pitches[i] = texture_strides[i];
+ modifiers[i] = texture_modifiers;
+
+ if(fds[i] == -1)
+ texture_num_planes = i;
+ }
+ const bool success = texture_num_planes > 0 && vaapi_copy_drm_planes_to_video_surface(video_codec_context, video_frame, source_pos, source_size, dest_pos, dest_size, texture_fourcc, source_size, fds, offsets, pitches, modifiers, texture_num_planes);
+
+ for(int i = 0; i < texture_num_planes; ++i) {
+ if(texture_fds[i] > 0) {
+ close(texture_fds[i]);
+ texture_fds[i] = -1;
+ }
+ }
+
+ return success;
+}
+
vec2i scale_keep_aspect_ratio(vec2i from, vec2i to) {
if(from.x == 0 || from.y == 0)
return (vec2i){0, 0};