aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2024-09-18 00:11:23 +0200
committerdec05eba <dec05eba@protonmail.com>2024-09-18 11:15:10 +0200
commit41bd6cee631a16076435984891f2134443ecec31 (patch)
tree18e89b80a663a6b7980bb7f6f7fe453d5aa75684
parent702c681a9566a0b49e66168f91ceac97e44cef97 (diff)
Use async_depth option on vaapi for much better encoding performance
-rw-r--r--TODO7
-rw-r--r--include/damage.h3
-rw-r--r--src/capture/kms.c3
-rw-r--r--src/damage.c27
-rw-r--r--src/main.cpp4
5 files changed, 30 insertions, 14 deletions
diff --git a/TODO b/TODO
index de303e3..0505b1d 100644
--- a/TODO
+++ b/TODO
@@ -157,3 +157,10 @@ Add cbr option.
Restart replay/update video resolution if monitor resolution changes.
Fix pure vaapi copy on intel.
+
+ffmpeg supports vulkan encoding now (h264!). Doesn't work on amd yet because mesa is missing VK_KHR_video_maintenance1, see https://gitlab.freedesktop.org/mesa/mesa/-/issues/11857. Test on nvidia!
+
+Test vaapi low latency mode (setenv("AMD_DEBUG", "lowlatencyenc", true);), added in mesa 24.1.4, released on july 17, 2024. Note that this forces gpu power usage to max at all times, even when recording at 2 fps.
+Use nvidia low latency options for better encoding times.
+
+Test ideal async_depth value. Increasing async_depth also increased gpu memory usage a lot (from 100mb to 500mb when moving from async_depth 2 to 16) at 4k resolution. Setting it to 8 increases it by 200mb which might be ok.
diff --git a/include/damage.h b/include/damage.h
index 2bf43d7..7229418 100644
--- a/include/damage.h
+++ b/include/damage.h
@@ -1,6 +1,7 @@
#ifndef GSR_DAMAGE_H
#define GSR_DAMAGE_H
+#include "cursor.h"
#include "utils.h"
#include <stdbool.h>
#include <stdint.h>
@@ -31,7 +32,7 @@ typedef struct {
//vec2i window_pos;
vec2i window_size;
- vec2i cursor_position; /* Relative to |window| */
+ gsr_cursor cursor; /* Relative to |window| */
gsr_monitor monitor;
char monitor_name[32];
} gsr_damage;
diff --git a/src/capture/kms.c b/src/capture/kms.c
index ba8510d..030d609 100644
--- a/src/capture/kms.c
+++ b/src/capture/kms.c
@@ -497,6 +497,9 @@ static void render_drm_cursor(gsr_capture_kms *self, gsr_color_conversion *color
}
static void render_x11_cursor(gsr_capture_kms *self, gsr_color_conversion *color_conversion, vec2i capture_pos, vec2i target_pos) {
+ if(!self->x11_cursor.visible)
+ return;
+
gsr_cursor_tick(&self->x11_cursor, DefaultRootWindow(self->params.egl->x11.dpy));
const vec2i cursor_pos = {
diff --git a/src/damage.c b/src/damage.c
index 868a2e6..8e62762 100644
--- a/src/damage.c
+++ b/src/damage.c
@@ -53,6 +53,9 @@ bool gsr_damage_init(gsr_damage *self, gsr_egl *egl, bool track_cursor) {
return false;
}
+ if(self->track_cursor)
+ self->track_cursor = gsr_cursor_init(&self->cursor, self->egl, self->egl->x11.dpy) == 0;
+
XRRSelectInput(self->egl->x11.dpy, DefaultRootWindow(self->egl->x11.dpy), RRScreenChangeNotifyMask | RRCrtcChangeNotifyMask | RROutputChangeNotifyMask);
self->damaged = true;
@@ -65,6 +68,8 @@ void gsr_damage_deinit(gsr_damage *self) {
self->damage = None;
}
+ gsr_cursor_deinit(&self->cursor);
+
self->damage_event = 0;
self->damage_error = 0;
@@ -245,16 +250,11 @@ static void gsr_damage_on_damage_event(gsr_damage *self, XEvent *xev) {
XFlush(self->egl->x11.dpy);
}
-static void gsr_damage_on_event_cursor(gsr_damage *self) {
- Window root_return = None;
- Window child_return = None;
- int dummy_i;
- unsigned int dummy_u;
- vec2i cursor_position = {0, 0};
- XQueryPointer(self->egl->x11.dpy, self->window, &root_return, &child_return, &dummy_i, &dummy_i, &cursor_position.x, &cursor_position.y, &dummy_u);
- if(cursor_position.x != self->cursor_position.x || cursor_position.y != self->cursor_position.y) {
- self->cursor_position = cursor_position;
- const gsr_rectangle cursor_region = { self->cursor_position, {64, 64} }; // TODO: Track cursor size
+static void gsr_damage_on_tick_cursor(gsr_damage *self) {
+ vec2i prev_cursor_pos = self->cursor.position;
+ gsr_cursor_tick(&self->cursor, self->window);
+ if(self->cursor.position.x != prev_cursor_pos.x || self->cursor.position.y != prev_cursor_pos.y) {
+ const gsr_rectangle cursor_region = { self->cursor.position, self->cursor.size };
switch(self->track_type) {
case GSR_DAMAGE_TRACK_NONE: {
self->damaged = true;
@@ -302,14 +302,17 @@ void gsr_damage_on_event(gsr_damage *self, XEvent *xev) {
if(self->damage_event && xev->type == self->damage_event + XDamageNotify)
gsr_damage_on_damage_event(self, xev);
+
+ if(self->track_cursor)
+ gsr_cursor_on_event(&self->cursor, xev);
}
void gsr_damage_tick(gsr_damage *self) {
if(self->damage_event == 0 || self->track_type == GSR_DAMAGE_TRACK_NONE)
return;
- if(self->track_cursor && !self->damaged)
- gsr_damage_on_event_cursor(self);
+ if(self->track_cursor && self->cursor.visible && !self->damaged)
+ gsr_damage_on_tick_cursor(self);
}
bool gsr_damage_is_damaged(gsr_damage *self) {
diff --git a/src/main.cpp b/src/main.cpp
index ddbf95b..fab768d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -887,6 +887,8 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide
} else {
// TODO: More quality options
//av_dict_set_int(&options, "low_power", 1, 0);
+ // Improves performance but increases vram
+ av_dict_set_int(&options, "async_depth", 8, 0);
if(codec_context->codec_id == AV_CODEC_ID_H264) {
// TODO:
@@ -3389,7 +3391,7 @@ int main(int argc, char **argv) {
const int64_t expected_frames = std::round((this_video_frame_time - record_start_time) / target_fps);
int num_frames = std::max((int64_t)0LL, expected_frames - video_pts_counter);
const double num_frames_seconds = num_frames * target_fps;
- if((damaged || num_frames_seconds >= damage_timeout_seconds) && !paused/* && fps_counter < fps + 100*/) {
+ if((damaged || (framerate_mode == FramerateMode::CONSTANT && num_frames > 0) || (framerate_mode != FramerateMode::CONSTANT && num_frames_seconds >= damage_timeout_seconds)) && !paused) {
gsr_damage_clear(&damage);
if(capture->clear_damage)
capture->clear_damage(capture);