From 4bcf976a71c3cb7660912b4e888a2197f81eb4b1 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 16 Sep 2024 02:17:29 +0200 Subject: Improve capture performance on amd/intel when using monitor capture: capture display frame using purely vaapi unless the monitor is rotated --- src/capture/kms.c | 50 +++++++++---- src/egl.c | 12 ++++ src/encoder/video/vaapi.c | 5 +- src/main.cpp | 15 ++-- src/utils.c | 177 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 238 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/capture/kms.c b/src/capture/kms.c index 9287a8b..c36add4 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -49,6 +49,9 @@ typedef struct { bool is_x11; gsr_cursor x11_cursor; + + AVCodecContext *video_codec_context; + bool performance_error_shown; } gsr_capture_kms; static void gsr_capture_kms_cleanup_kms_fds(gsr_capture_kms *self) { @@ -194,6 +197,8 @@ static int gsr_capture_kms_start(gsr_capture *cap, AVCodecContext *video_codec_c frame->width = video_codec_context->width; frame->height = video_codec_context->height; + + self->video_codec_context = video_codec_context; return 0; } @@ -495,12 +500,6 @@ static int gsr_capture_kms_capture(gsr_capture *cap, AVFrame *frame, gsr_color_c if(drm_fd->has_hdr_metadata && self->params.hdr && hdr_metadata_is_supported_format(&drm_fd->hdr_metadata)) gsr_kms_set_hdr_metadata(self, drm_fd); - EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); - if(image) { - gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); - self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); - } - const float texture_rotation = monitor_rotation_to_radians(self->monitor_rotation); const int target_x = max_int(0, frame->width / 2 - self->capture_size.x / 2); const int target_y = max_int(0, frame->height / 2 - self->capture_size.y / 2); @@ -509,13 +508,40 @@ static int gsr_capture_kms_capture(gsr_capture *cap, AVFrame *frame, gsr_color_c if(!capture_is_combined_plane) capture_pos = (vec2i){drm_fd->x, drm_fd->y}; - self->params.egl->glFlush(); - self->params.egl->glFinish(); + if(!self->performance_error_shown && self->monitor_rotation != GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(self->video_codec_context)) { + self->performance_error_shown = true; + fprintf(stderr,"gsr warning: gsr_capture_kms_capture: the monitor you are recording is rotated, composition will have to be used." + " If you are experience performance problems in the video then record a single window on X11 or use portal capture option instead\n"); + } - gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, - (vec2i){target_x, target_y}, self->capture_size, - capture_pos, self->capture_size, - texture_rotation, self->external_texture_fallback); + /* Fast opengl free path */ + if(self->monitor_rotation == GSR_MONITOR_ROT_0 && video_codec_context_is_vaapi(self->video_codec_context)) { + int fds[4]; + uint32_t offsets[4]; + uint32_t pitches[4]; + uint64_t modifiers[4]; + for(int i = 0; i < drm_fd->num_dma_bufs; ++i) { + fds[i] = drm_fd->dma_buf[i].fd; + offsets[i] = drm_fd->dma_buf[i].offset; + pitches[i] = drm_fd->dma_buf[i].pitch; + modifiers[i] = drm_fd->modifier; + } + vaapi_copy_drm_planes_to_video_surface(self->video_codec_context, frame, capture_pos.x, capture_pos.y, drm_fd->pixel_format, drm_fd->width, drm_fd->height, fds, offsets, pitches, modifiers, drm_fd->num_dma_bufs); + } else { + EGLImage image = gsr_capture_kms_create_egl_image_with_fallback(self, drm_fd); + if(image) { + gsr_capture_kms_bind_image_to_input_texture_with_fallback(self, image); + self->params.egl->eglDestroyImage(self->params.egl->egl_display, image); + } + + self->params.egl->glFlush(); + self->params.egl->glFinish(); + + gsr_color_conversion_draw(color_conversion, self->external_texture_fallback ? self->external_input_texture_id : self->input_texture_id, + (vec2i){target_x, target_y}, self->capture_size, + capture_pos, self->capture_size, + texture_rotation, self->external_texture_fallback); + } if(self->params.record_cursor) { gsr_kms_response_item *cursor_drm_fd = find_cursor_drm_if_on_monitor(self, drm_fd->connector_id, capture_is_combined_plane); diff --git a/src/egl.c b/src/egl.c index b4e3902..05d6680 100644 --- a/src/egl.c +++ b/src/egl.c @@ -388,11 +388,23 @@ static bool gsr_egl_load_egl(gsr_egl *self, void *library) { } static bool gsr_egl_proc_load_egl(gsr_egl *self) { + self->eglExportDMABUFImageQueryMESA = (FUNC_eglExportDMABUFImageQueryMESA)self->eglGetProcAddress("eglExportDMABUFImageQueryMESA"); + self->eglExportDMABUFImageMESA = (FUNC_eglExportDMABUFImageMESA)self->eglGetProcAddress("eglExportDMABUFImageMESA"); self->glEGLImageTargetTexture2DOES = (FUNC_glEGLImageTargetTexture2DOES)self->eglGetProcAddress("glEGLImageTargetTexture2DOES"); self->eglQueryDisplayAttribEXT = (FUNC_eglQueryDisplayAttribEXT)self->eglGetProcAddress("eglQueryDisplayAttribEXT"); self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT"); self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT"); + if(!self->eglExportDMABUFImageQueryMESA) { + fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n"); + return false; + } + + if(!self->eglExportDMABUFImageMESA) { + fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageMESA\n"); + return false; + } + if(!self->glEGLImageTargetTexture2DOES) { fprintf(stderr, "gsr error: gsr_egl_load failed: could not find glEGLImageTargetTexture2DOES\n"); return false; diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c index 7a2abfc..03218cb 100644 --- a/src/encoder/video/vaapi.c +++ b/src/encoder/video/vaapi.c @@ -41,8 +41,7 @@ static bool gsr_video_encoder_vaapi_setup_context(gsr_video_encoder_vaapi *self, return false; } - AVHWFramesContext *hw_frame_context = - (AVHWFramesContext *)frame_context->data; + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; hw_frame_context->width = video_codec_context->width; hw_frame_context->height = video_codec_context->height; hw_frame_context->sw_format = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? AV_PIX_FMT_P010LE : AV_PIX_FMT_NV12; @@ -51,7 +50,7 @@ static bool gsr_video_encoder_vaapi_setup_context(gsr_video_encoder_vaapi *self, //hw_frame_context->initial_pool_size = 20; - AVVAAPIDeviceContext *vactx =((AVHWDeviceContext*)self->device_ctx->data)->hwctx; + AVVAAPIDeviceContext *vactx = ((AVHWDeviceContext*)self->device_ctx->data)->hwctx; self->va_dpy = vactx->display; if (av_hwframe_ctx_init(frame_context) < 0) { diff --git a/src/main.cpp b/src/main.cpp index 7687b07..f2526f8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -519,13 +519,13 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) { switch(video_quality) { case VideoQuality::MEDIUM: - codec_context->global_quality = 160 * quality_multiply; + codec_context->global_quality = 150 * quality_multiply; break; case VideoQuality::HIGH: - codec_context->global_quality = 130 * quality_multiply; + codec_context->global_quality = 120 * quality_multiply; break; case VideoQuality::VERY_HIGH: - codec_context->global_quality = 110 * quality_multiply; + codec_context->global_quality = 100 * quality_multiply; break; case VideoQuality::ULTRA: codec_context->global_quality = 90 * quality_multiply; @@ -2753,6 +2753,11 @@ int main(int argc, char **argv) { } } + if(wayland && is_monitor_capture) { + fprintf(stderr, "gsr warning: it's not possible to sync video to recorded monitor exactly on wayland when recording a monitor." + " If you experience stutter in the video then record with portal capture option instead (-w portal) or use X11 instead\n"); + } + // TODO: Fix constant framerate not working properly on amd/intel because capture framerate gets locked to the same framerate as // game framerate, which doesn't work well when you need to encode multiple duplicate frames (AMD/Intel is slow at encoding!). // It also appears to skip audio frames on nvidia wayland? why? that should be fine, but it causes video stuttering because of audio/video sync. @@ -3442,8 +3447,8 @@ int main(int argc, char **argv) { const double frame_sleep_fps = 1.0 / update_fps; const double sleep_time = frame_sleep_fps - (frame_end - frame_start); if(sleep_time > 0.0) { - if(damaged) - av_usleep(sleep_time * 1000.0 * 1000.0); + if(damaged) + av_usleep(sleep_time * 1000.0 * 1000.0); else av_usleep(2 * 1000.0); // 2 milliseconds } diff --git a/src/utils.c b/src/utils.c index 3e4138a..2b1b07c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -12,9 +12,11 @@ #include #include - #include #include +#include +#include +#include double clock_get_monotonic_seconds(void) { struct timespec ts; @@ -624,3 +626,176 @@ void setup_dma_buf_attrs(intptr_t *img_attr, uint32_t format, uint32_t width, ui img_attr[img_attr_index++] = EGL_NONE; assert(img_attr_index <= 44); } + +static VADisplay video_codec_context_get_vaapi_display(AVCodecContext *video_codec_context) { + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)video_codec_context->hw_frames_ctx->data; + AVHWDeviceContext *device_context = (AVHWDeviceContext*)hw_frame_context->device_ctx; + if(device_context->type == AV_HWDEVICE_TYPE_VAAPI) { + AVVAAPIDeviceContext *vactx = device_context->hwctx; + return vactx->display; + } + return NULL; +} + +bool video_codec_context_is_vaapi(AVCodecContext *video_codec_context) { + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)video_codec_context->hw_frames_ctx->data; + AVHWDeviceContext *device_context = (AVHWDeviceContext*)hw_frame_context->device_ctx; + return device_context->type == AV_HWDEVICE_TYPE_VAAPI; +} + +bool vaapi_copy_drm_planes_to_video_surface(AVCodecContext *video_codec_context, AVFrame *video_frame, int x, int y, uint32_t format, uint32_t width, uint32_t height, const int *fds, const uint32_t *offsets, const uint32_t *pitches, const uint64_t *modifiers, int num_planes) { + VAConfigID config_id = 0; + VAContextID context_id = 0; + VASurfaceID input_surface = 0; + VABufferID buffer_id = 0; + bool success = true; + + VADisplay va_dpy = video_codec_context_get_vaapi_display(video_codec_context); + if(!va_dpy) { + success = false; + goto done; + } + + VAStatus va_status = vaCreateConfig(va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &config_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateConfig failed, error: %d\n", va_status); + success = false; + goto done; + } + + VASurfaceID target_surface_id = (uintptr_t)video_frame->data[3]; + va_status = vaCreateContext(va_dpy, config_id, width, height, VA_PROGRESSIVE, &target_surface_id, 1, &context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateContext failed, error: %d\n", va_status); + success = false; + goto done; + } + + VADRMPRIMESurfaceDescriptor buf = {0}; + buf.fourcc = format;//VA_FOURCC_BGRX; // TODO: VA_FOURCC_BGRA, VA_FOURCC_X2R10G10B10 + buf.width = width; + buf.height = height; + buf.num_objects = num_planes; + buf.num_layers = 1; + buf.layers[0].drm_format = format; + buf.layers[0].num_planes = buf.num_objects; + for(int i = 0; i < num_planes; ++i) { + buf.objects[i].fd = fds[i]; + buf.objects[i].size = height * pitches[i]; // TODO: + buf.objects[i].drm_format_modifier = modifiers[i]; + + buf.layers[0].object_index[i] = i; + buf.layers[0].offset[i] = offsets[i]; + buf.layers[0].pitch[i] = pitches[i]; + } + + VASurfaceAttrib attribs[2] = {0}; + attribs[0].type = VASurfaceAttribMemoryType; + attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[0].value.type = VAGenericValueTypeInteger; + attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2; + attribs[1].type = VASurfaceAttribExternalBufferDescriptor; + attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; + attribs[1].value.type = VAGenericValueTypePointer; + attribs[1].value.value.p = &buf; + + // TODO: RT_FORMAT with 10 bit/hdr, VA_RT_FORMAT_RGB32_10 + va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_RGB32, width, height, &input_surface, 1, attribs, 2); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateSurfaces failed, error: %d\n", va_status); + success = false; + goto done; + } + + // TODO: + const VARectangle output_region = { + .x = x, + .y = y, + .width = width, + .height = height + }; + + // Copying a surface to another surface will automatically perform the color conversion. Thanks vaapi! + VAProcPipelineParameterBuffer params = {0}; + params.surface = input_surface; + params.surface_region = NULL; + params.output_region = &output_region; + params.output_background_color = 0; + params.filter_flags = VA_FRAME_PICTURE; + params.pipeline_flags = VA_PROC_PIPELINE_FAST; + + params.input_color_properties.colour_primaries = 1; + params.input_color_properties.transfer_characteristics = 1; + params.input_color_properties.matrix_coefficients = 1; + params.surface_color_standard = VAProcColorStandardBT709; // TODO: + params.input_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; + + params.output_color_properties.colour_primaries = 1; + params.output_color_properties.transfer_characteristics = 1; + params.output_color_properties.matrix_coefficients = 1; + params.output_color_standard = VAProcColorStandardBT709; // TODO: + params.output_color_properties.color_range = video_frame->color_range == AVCOL_RANGE_JPEG ? VA_SOURCE_RANGE_FULL : VA_SOURCE_RANGE_REDUCED; + + params.processing_mode = VAProcPerformanceMode; + + // VAProcPipelineCaps pipeline_caps = {0}; + // va_status = vaQueryVideoProcPipelineCaps(self->va_dpy, + // self->context_id, + // NULL, 0, + // &pipeline_caps); + // if(va_status == VA_STATUS_SUCCESS) { + // fprintf(stderr, "pipeline_caps: %u, %u\n", (unsigned int)pipeline_caps.rotation_flags, pipeline_caps.blend_flags); + // } + + // TODO: params.output_hdr_metadata + + // TODO: + // if (first surface to render) + // pipeline_param->output_background_color = 0xff000000; // black + + va_status = vaCreateBuffer(va_dpy, context_id, VAProcPipelineParameterBufferType, sizeof(params), 1, ¶ms, &buffer_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaCreateBuffer failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaBeginPicture(va_dpy, context_id, target_surface_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaBeginPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaRenderPicture(va_dpy, context_id, &buffer_id, 1); + if(va_status != VA_STATUS_SUCCESS) { + vaEndPicture(va_dpy, context_id); + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaRenderPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + va_status = vaEndPicture(va_dpy, context_id); + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: vaapi_copy_drm_planes_to_video_surface: vaEndPicture failed, error: %d\n", va_status); + success = false; + goto done; + } + + //vaSyncBuffer(self->va_dpy, self->buffer_id, 1000 * 1000); + + done: + if(buffer_id) + vaDestroyBuffer(va_dpy, buffer_id); + + if(input_surface) + vaDestroySurfaces(va_dpy, &input_surface, 1); + + if(context_id) + vaDestroyContext(va_dpy, context_id); + + if(config_id) + vaDestroyConfig(va_dpy, config_id); + + return success; +} -- cgit v1.2.3