From 41bd6cee631a16076435984891f2134443ecec31 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Wed, 18 Sep 2024 00:11:23 +0200 Subject: Use async_depth option on vaapi for much better encoding performance --- TODO | 7 +++++++ include/damage.h | 3 ++- src/capture/kms.c | 3 +++ src/damage.c | 27 +++++++++++++++------------ src/main.cpp | 4 +++- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index de303e3..0505b1d 100644 --- a/TODO +++ b/TODO @@ -157,3 +157,10 @@ Add cbr option. Restart replay/update video resolution if monitor resolution changes. Fix pure vaapi copy on intel. + +ffmpeg supports vulkan encoding now (h264!). Doesn't work on amd yet because mesa is missing VK_KHR_video_maintenance1, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/11857. Test on nvidia! + +Test vaapi low latency mode (setenv("AMD_DEBUG", "lowlatencyenc", true);), added in mesa 24.1.4, released on july 17, 2024. Note that this forces gpu power usage to max at all times, even when recording at 2 fps. +Use nvidia low latency options for better encoding times. + +Test ideal async_depth value. Increasing async_depth also increased gpu memory usage a lot (from 100mb to 500mb when moving from async_depth 2 to 16) at 4k resolution. Setting it to 8 increases it by 200mb which might be ok. diff --git a/include/damage.h b/include/damage.h index 2bf43d7..7229418 100644 --- a/include/damage.h +++ b/include/damage.h @@ -1,6 +1,7 @@ #ifndef GSR_DAMAGE_H #define GSR_DAMAGE_H +#include "cursor.h" #include "utils.h" #include #include @@ -31,7 +32,7 @@ typedef struct { //vec2i window_pos; vec2i window_size; - vec2i cursor_position; /* Relative to |window| */ + gsr_cursor cursor; /* Relative to |window| */ gsr_monitor monitor; char monitor_name[32]; } gsr_damage; diff --git a/src/capture/kms.c b/src/capture/kms.c index ba8510d..030d609 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -497,6 +497,9 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color } static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, vec2i capture_pos, vec2i target_pos) { + if(!self->x11_cursor.visible) + return; + gsr_cursor_tick(&self->x11_cursor, DefaultRootWindow(self->params.egl->x11.dpy)); const vec2i cursor_pos = { diff --git a/src/damage.c b/src/damage.c index 868a2e6..8e62762 100644 --- a/src/damage.c +++ b/src/damage.c @@ -53,6 +53,9 @@ bool gsr_damage_init(gsr_damage *self, gsr_egl *egl, bool track_cursor) { return false; } + if(self->track_cursor) + self->track_cursor = gsr_cursor_init(&self->cursor, self->egl, self->egl->x11.dpy) == 0; + XRRSelectInput(self->egl->x11.dpy, DefaultRootWindow(self->egl->x11.dpy), RRScreenChangeNotifyMask | RRCrtcChangeNotifyMask | RROutputChangeNotifyMask); self->damaged = true; @@ -65,6 +68,8 @@ void gsr_damage_deinit(gsr_damage *self) { self->damage = None; } + gsr_cursor_deinit(&self->cursor); + self->damage_event = 0; self->damage_error = 0; @@ -245,16 +250,11 @@ static void gsr_damage_on_damage_event(gsr_damage *self, XEvent *xev) { XFlush(self->egl->x11.dpy); } -static void gsr_damage_on_event_cursor(gsr_damage *self) { - Window root_return = None; - Window child_return = None; - int dummy_i; - unsigned int dummy_u; - vec2i cursor_position = {0, 0}; - XQueryPointer(self->egl->x11.dpy, self->window, &root_return, &child_return, &dummy_i, &dummy_i, &cursor_position.x, &cursor_position.y, &dummy_u); - if(cursor_position.x != self->cursor_position.x || cursor_position.y != self->cursor_position.y) { - self->cursor_position = cursor_position; - const gsr_rectangle cursor_region = { self->cursor_position, {64, 64} }; // TODO: Track cursor size +static void gsr_damage_on_tick_cursor(gsr_damage *self) { + vec2i prev_cursor_pos = self->cursor.position; + gsr_cursor_tick(&self->cursor, self->window); + if(self->cursor.position.x != prev_cursor_pos.x || self->cursor.position.y != prev_cursor_pos.y) { + const gsr_rectangle cursor_region = { self->cursor.position, self->cursor.size }; switch(self->track_type) { case GSR_DAMAGE_TRACK_NONE: { self->damaged = true; @@ -302,14 +302,17 @@ void gsr_damage_on_event(gsr_damage *self, XEvent *xev) { if(self->damage_event && xev->type == self->damage_event + XDamageNotify) gsr_damage_on_damage_event(self, xev); + + if(self->track_cursor) + gsr_cursor_on_event(&self->cursor, xev); } void gsr_damage_tick(gsr_damage *self) { if(self->damage_event == 0 || self->track_type == GSR_DAMAGE_TRACK_NONE) return; - if(self->track_cursor && !self->damaged) - gsr_damage_on_event_cursor(self); + if(self->track_cursor && self->cursor.visible && !self->damaged) + gsr_damage_on_tick_cursor(self); } bool gsr_damage_is_damaged(gsr_damage *self) { diff --git a/src/main.cpp b/src/main.cpp index ddbf95b..fab768d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -887,6 +887,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide } else { // TODO: More quality options //av_dict_set_int(&options, "low_power", 1, 0); + // Improves performance but increases vram + av_dict_set_int(&options, "async_depth", 8, 0); if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: @@ -3389,7 +3391,7 @@ int main(int argc, char **argv) { const int64_t expected_frames = std::round((this_video_frame_time - record_start_time) / target_fps); int num_frames = std::max((int64_t)0LL, expected_frames - video_pts_counter); const double num_frames_seconds = num_frames * target_fps; - if((damaged || num_frames_seconds >= damage_timeout_seconds) && !paused/* && fps_counter < fps + 100*/) { + if((damaged || (framerate_mode == FramerateMode::CONSTANT && num_frames > 0) || (framerate_mode != FramerateMode::CONSTANT && num_frames_seconds >= damage_timeout_seconds)) && !paused) { gsr_damage_clear(&damage); if(capture->clear_damage) capture->clear_damage(capture); -- cgit v1.2.3