aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2024-09-16 02:17:29 +0200
committerdec05eba <dec05eba@protonmail.com>2024-09-16 23:31:53 +0200
commit4bcf976a71c3cb7660912b4e888a2197f81eb4b1 (patch)
tree3dacba98efcffd832736c5a9a3fa059253e6abe8 /src
parent4a8544eeb321d37cdc3058305268f4c2ee177ca7 (diff)
Improve capture performance on amd/intel when using monitor capture: capture display frame using purely vaapi unless the monitor is rotated
Diffstat (limited to 'src')
-rw-r--r--src/capture/kms.c50
-rw-r--r--src/egl.c12
-rw-r--r--src/encoder/video/vaapi.c5
-rw-r--r--src/main.cpp15
-rw-r--r--src/utils.c177
5 files changed, 238 insertions, 21 deletions
diff --git a/src/capture/kms.c b/src/capture/kms.c
index 9287a8b..c36add4 100644
--- a/src/capture/kms.c
+++ b/src/capture/kms.c
@@ -49,6 +49,9 @@ typedef struct {
bool is_x11;
gsr_cursor x11_cursor;
+
+ AVCodecContext *video_codec_context;
+ bool performance_error_shown;
} gsr_capture_kms;
static void gsr_capture_kms_cleanup_kms_fds(gsr_capture_kms *self) {
@@ -194,6 +197,8 @@ static int gsr_capture_kms_start(gsr_capture *cap, AVCodecContext *video_codec_c
frame->width = video_codec_context->width;
frame->height = video_codec_context->height;
+
+ self->video_codec_context = video_codec_context;
return 0;
}
@@ -495,12 +500,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, AVFrame *frame, gsr_color_c
if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata))
gsr_kms_set_hdr_metadata(self, drm_fd);
- EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
- if(image) {
- gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
- self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
- }
-
const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation);
const int target_x = max_int(0, frame->width / 2 - self->capture_size.x / 2);
const int target_y = max_int(0, frame->height / 2 - self->capture_size.y / 2);
@@ -509,13 +508,40 @@ static int gsr_capture_kms_capture(gsr_capture *cap, AVFrame *frame, gsr_color_c
if(!capture_is_combined_plane)
capture_pos = (vec2i){drm_fd->x, drm_fd->y};
- self->params.egl->glFlush();
- self->params.egl->glFinish();
+ if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(self->video_codec_context)) {
+ self->performance_error_shown = true;
+ fprintf(stderr,"gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used."
+ " If you are experience performance problems in the video then record a single window on X11 or use portal capture option instead\n");
+ }
- gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
- (vec2i){target_x, target_y}, self->capture_size,
- capture_pos, self->capture_size,
- texture_rotation, self->external_texture_fallback);
+ /* Fast opengl free path */
+ if(self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(self->video_codec_context)) {
+ int fds[4];
+ uint32_t offsets[4];
+ uint32_t pitches[4];
+ uint64_t modifiers[4];
+ for(int i = 0; i < drm_fd->num_dma_bufs; ++i) {
+ fds[i] = drm_fd->dma_buf[i].fd;
+ offsets[i] = drm_fd->dma_buf[i].offset;
+ pitches[i] = drm_fd->dma_buf[i].pitch;
+ modifiers[i] = drm_fd->modifier;
+ }
+ vaapi_copy_drm_planes_to_video_surface(self->video_codec_context, frame, capture_pos.x, capture_pos.y, drm_fd->pixel_format, drm_fd->width, drm_fd->height, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs);
+ } else {
+ EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd);
+ if(image) {
+ gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image);
+ self->params.egl->eglDestroyImage(self->params.egl->egl_display, image);
+ }
+
+ self->params.egl->glFlush();
+ self->params.egl->glFinish();
+
+ gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id,
+ (vec2i){target_x, target_y}, self->capture_size,
+ capture_pos, self->capture_size,
+ texture_rotation, self->external_texture_fallback);
+ }
if(self->params.record_cursor) {
gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane);
diff --git a/src/egl.c b/src/egl.c
index b4e3902..05d6680 100644
--- a/src/egl.c
+++ b/src/egl.c
@@ -388,11 +388,23 @@ static bool gsr_egl_load_egl(gsr_egl *self, void *library) {
}
static bool gsr_egl_proc_load_egl(gsr_egl *self) {
+ self->eglExportDMABUFImageQueryMESA = (FUNC_eglExportDMABUFImageQueryMESA)self->eglGetProcAddress("eglExportDMABUFImageQueryMESA");
+ self->eglExportDMABUFImageMESA = (FUNC_eglExportDMABUFImageMESA)self->eglGetProcAddress("eglExportDMABUFImageMESA");
self->glEGLImageTargetTexture2DOES = (FUNC_glEGLImageTargetTexture2DOES)self->eglGetProcAddress("glEGLImageTargetTexture2DOES");
self->eglQueryDisplayAttribEXT = (FUNC_eglQueryDisplayAttribEXT)self->eglGetProcAddress("eglQueryDisplayAttribEXT");
self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT");
self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT");
+ if(!self->eglExportDMABUFImageQueryMESA) {
+ fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n");
+ return false;
+ }
+
+ if(!self->eglExportDMABUFImageMESA) {
+ fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageMESA\n");
+ return false;
+ }
+
if(!self->glEGLImageTargetTexture2DOES) {
fprintf(stderr, "gsr error: gsr_egl_load failed: could not find glEGLImageTargetTexture2DOES\n");
return false;
diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c
index 7a2abfc..03218cb 100644
--- a/src/encoder/video/vaapi.c
+++ b/src/encoder/video/vaapi.c
@@ -41,8 +41,7 @@ static bool gsr_video_encoder_vaapi_setup_context(gsr_video_encoder_vaapi *self,
return false;
}
- AVHWFramesContext *hw_frame_context =
- (AVHWFramesContext *)frame_context->data;
+ AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data;
hw_frame_context->width = video_codec_context->width;
hw_frame_context->height = video_codec_context->height;
hw_frame_context->sw_format = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? AV_PIX_FMT_P010LE : AV_PIX_FMT_NV12;
@@ -51,7 +50,7 @@ static bool gsr_video_encoder_vaapi_setup_context(gsr_video_encoder_vaapi *self,
//hw_frame_context->initial_pool_size = 20;
- AVVAAPIDeviceContext *vactx =((AVHWDeviceContext*)self->device_ctx->data)->hwctx;
+ AVVAAPIDeviceContext *vactx = ((AVHWDeviceContext*)self->device_ctx->data)->hwctx;
self->va_dpy = vactx->display;
if (av_hwframe_ctx_init(frame_context) < 0) {
diff --git a/src/main.cpp b/src/main.cpp
index 7687b07..f2526f8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -519,13 +519,13 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt,
if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) {
switch(video_quality) {
case VideoQuality::MEDIUM:
- codec_context->global_quality = 160 * quality_multiply;
+ codec_context->global_quality = 150 * quality_multiply;
break;
case VideoQuality::HIGH:
- codec_context->global_quality = 130 * quality_multiply;
+ codec_context->global_quality = 120 * quality_multiply;
break;
case VideoQuality::VERY_HIGH:
- codec_context->global_quality = 110 * quality_multiply;
+ codec_context->global_quality = 100 * quality_multiply;
break;
case VideoQuality::ULTRA:
codec_context->global_quality = 90 * quality_multiply;
@@ -2753,6 +2753,11 @@ int main(int argc, char **argv) {
}
}
+ if(wayland && is_monitor_capture) {
+ fprintf(stderr, "gsr warning: it's not possible to sync video to recorded monitor exactly on wayland when recording a monitor."
+ " If you experience stutter in the video then record with portal capture option instead (-w portal) or use X11 instead\n");
+ }
+
// TODO: Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as
// game framerate, which doesn't work well when you need to encode multiple duplicate frames (AMD/Intel is slow at encoding!).
// It also appears to skip audio frames on nvidia wayland? why? that should be fine, but it causes video stuttering because of audio/video sync.
@@ -3442,8 +3447,8 @@ int main(int argc, char **argv) {
const double frame_sleep_fps = 1.0 / update_fps;
const double sleep_time = frame_sleep_fps - (frame_end - frame_start);
if(sleep_time > 0.0) {
- if(damaged)
- av_usleep(sleep_time * 1000.0 * 1000.0);
+ if(damaged)
+ av_usleep(sleep_time * 1000.0 * 1000.0);
else
av_usleep(2 * 1000.0); // 2 milliseconds
}
diff --git a/src/utils.c b/src/utils.c
index 3e4138a..2b1b07c 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -12,9 +12,11 @@
#include <xf86drmMode.h>
#include <xf86drm.h>
-
#include <X11/Xatom.h>
#include <X11/extensions/Xrandr.h>
+#include <va/va_drmcommon.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/hwcontext_vaapi.h>
double clock_get_monotonic_seconds(void) {
struct timespec ts;
@@ -624,3 +626,176 @@ void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, ui
img_attr[img_attr_index++] = EGL_NONE;
assert(img_attr_index <= 44);
}
+
+static VADisplay video_codec_context_get_vaapi_display(AVCodecContext *video_codec_context) {
+ AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)video_codec_context->hw_frames_ctx->data;
+ AVHWDeviceContext *device_context = (AVHWDeviceContext*)hw_frame_context->device_ctx;
+ if(device_context->type == AV_HWDEVICE_TYPE_VAAPI) {
+ AVVAAPIDeviceContext *vactx = device_context->hwctx;
+ return vactx->display;
+ }
+ return NULL;
+}
+
+bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) {
+ AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)video_codec_context->hw_frames_ctx->data;
+ AVHWDeviceContext *device_context = (AVHWDeviceContext*)hw_frame_context->device_ctx;
+ return device_context->type == AV_HWDEVICE_TYPE_VAAPI;
+}
+
+bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, int x, int y, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) {
+ VAConfigID config_id = 0;
+ VAContextID context_id = 0;
+ VASurfaceID input_surface = 0;
+ VABufferID buffer_id = 0;
+ bool success = true;
+
+ VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context);
+ if(!va_dpy) {
+ success = false;
+ goto done;
+ }
+
+ VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ VASurfaceID target_surface_id = (uintptr_t)video_frame->data[3];
+ va_status = vaCreateContext(va_dpy, config_id, width, height, VA_PROGRESSIVE, &target_surface_id, 1, &context_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ VADRMPRIMESurfaceDescriptor buf = {0};
+ buf.fourcc = format;//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10
+ buf.width = width;
+ buf.height = height;
+ buf.num_objects = num_planes;
+ buf.num_layers = 1;
+ buf.layers[0].drm_format = format;
+ buf.layers[0].num_planes = buf.num_objects;
+ for(int i = 0; i < num_planes; ++i) {
+ buf.objects[i].fd = fds[i];
+ buf.objects[i].size = height * pitches[i]; // TODO:
+ buf.objects[i].drm_format_modifier = modifiers[i];
+
+ buf.layers[0].object_index[i] = i;
+ buf.layers[0].offset[i] = offsets[i];
+ buf.layers[0].pitch[i] = pitches[i];
+ }
+
+ VASurfaceAttrib attribs[2] = {0};
+ attribs[0].type = VASurfaceAttribMemoryType;
+ attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[0].value.type = VAGenericValueTypeInteger;
+ attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
+ attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
+ attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[1].value.type = VAGenericValueTypePointer;
+ attribs[1].value.value.p = &buf;
+
+ // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10
+ va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, width, height, &input_surface, 1, attribs, 2);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ // TODO:
+ const VARectangle output_region = {
+ .x = x,
+ .y = y,
+ .width = width,
+ .height = height
+ };
+
+ // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi!
+ VAProcPipelineParameterBuffer params = {0};
+ params.surface = input_surface;
+ params.surface_region = NULL;
+ params.output_region = &output_region;
+ params.output_background_color = 0;
+ params.filter_flags = VA_FRAME_PICTURE;
+ params.pipeline_flags = VA_PROC_PIPELINE_FAST;
+
+ params.input_color_properties.colour_primaries = 1;
+ params.input_color_properties.transfer_characteristics = 1;
+ params.input_color_properties.matrix_coefficients = 1;
+ params.surface_color_standard = VAProcColorStandardBT709; // TODO:
+ params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
+
+ params.output_color_properties.colour_primaries = 1;
+ params.output_color_properties.transfer_characteristics = 1;
+ params.output_color_properties.matrix_coefficients = 1;
+ params.output_color_standard = VAProcColorStandardBT709; // TODO:
+ params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED;
+
+ params.processing_mode = VAProcPerformanceMode;
+
+ // VAProcPipelineCaps pipeline_caps = {0};
+ // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy,
+ // self->context_id,
+ // NULL, 0,
+ // &pipeline_caps);
+ // if(va_status == VA_STATUS_SUCCESS) {
+ // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags);
+ // }
+
+ // TODO: params.output_hdr_metadata
+
+ // TODO:
+ // if (first surface to render)
+ // pipeline_param->output_background_color = 0xff000000; // black
+
+ va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, &params, &buffer_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaBeginPicture(va_dpy, context_id, target_surface_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1);
+ if(va_status != VA_STATUS_SUCCESS) {
+ vaEndPicture(va_dpy, context_id);
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ va_status = vaEndPicture(va_dpy, context_id);
+ if(va_status != VA_STATUS_SUCCESS) {
+ fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status);
+ success = false;
+ goto done;
+ }
+
+ //vaSyncBuffer(self->va_dpy, self->buffer_id, 1000 * 1000);
+
+ done:
+ if(buffer_id)
+ vaDestroyBuffer(va_dpy, buffer_id);
+
+ if(input_surface)
+ vaDestroySurfaces(va_dpy, &input_surface, 1);
+
+ if(context_id)
+ vaDestroyContext(va_dpy, context_id);
+
+ if(config_id)
+ vaDestroyConfig(va_dpy, config_id);
+
+ return success;
+}