aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md4
-rw-r--r--include/NvFBCLibrary.hpp63
-rw-r--r--src/main.cpp180
3 files changed, 137 insertions, 110 deletions
diff --git a/README.md b/README.md
index 78545cf..c4e7a3b 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,6 @@ If you are using a variable refresh rate monitor, then choose to record "screen-
For screen capture to work with PRIME (laptops with a nvidia gpu), you must set the primary GPU to use your dedicated nvidia graphics card. You can do this by selecting "NVIDIA (Performance Mode) in nvidia settings:\
![](https://dec05eba.com/images/nvidia-settings-prime.png)\
and then rebooting your laptop.
-### TEMPORARY ISSUE ###
-screen-direct capture has been temporary disabled as it causes issues with stuttering. This might be a nvfbc bug.
# Performance
When recording Legend of Zelda Breath of the Wild at 4k, fps drops from 30 to 7 when using OBS Studio + nvenc, however when using this screen recorder the fps remains at 30.\
@@ -59,6 +57,6 @@ FFMPEG only uses the GPU with CUDA when doing transcoding from an input video to
libraries at compile-time.
* Clean up the code!
* Dynamically change bitrate/resolution to match desired fps. This would be helpful when streaming for example, where the encode output speed also depends on upload speed to the streaming service.
-* Show cursor when recording. Currently the cursor is not visible when recording a window.
+* Show cursor when recording. Currently the cursor is not visible when recording a window or screen-direct.
* Implement opengl injection to capture texture. This fixes composition issues and (VRR) without having to use NvFBC direct capture.
* Always use direct capture with NvFBC once the capture issue in mpv fullscreen has been resolved (maybe detect if direct capture fails in nvfbc and switch to non-direct recording. NvFBC says if direct capture fails).
diff --git a/include/NvFBCLibrary.hpp b/include/NvFBCLibrary.hpp
index 19b9bcc..dc7db1f 100644
--- a/include/NvFBCLibrary.hpp
+++ b/include/NvFBCLibrary.hpp
@@ -61,8 +61,32 @@ public:
if(!library || !display_to_capture || !display_width || !display_height || fbc_handle_created)
return false;
+ this->fps = fps;
const bool capture_region = (x > 0 || y > 0 || width > 0 || height > 0);
+ bool supports_direct_cursor = false;
+ int driver_major_version = 0;
+ int driver_minor_version = 0;
+ if(direct_capture && get_driver_version(&driver_major_version, &driver_minor_version)) {
+ fprintf(stderr, "Info: detected nvidia version: %d.%d\n", driver_major_version, driver_minor_version);
+
+ if(version_at_least(driver_major_version, driver_minor_version, 515, 57) && version_less_than(driver_major_version, driver_minor_version, 520, 56)) {
+ direct_capture = false;
+ fprintf(stderr, "Warning: \"screen-direct\" has temporary been disabled as it causes stuttering with driver versions >= 515.57 and < 520.56. Please update your driver if possible. Capturing \"screen\" instead.\n");
+ }
+
+ // TODO:
+ // Cursor capture disabled because moving the cursor doesn't update capture rate to monitor hz and instead captures at 10-30 hz
+ /*
+ if(direct_capture) {
+ if(version_at_least(driver_major_version, driver_minor_version, 515, 57))
+ supports_direct_cursor = true;
+ else
+ fprintf(stderr, "Info: capturing \"screen-direct\" but driver version appears to be less than 515.57. Disabling capture of cursor. Please update your driver if you want to capture your cursor or record \"screen\" instead.\n");
+ }
+ */
+ }
+
NVFBCSTATUS status;
NVFBC_TRACKING_TYPE tracking_type;
bool capture_session_created = false;
@@ -129,14 +153,14 @@ public:
memset(&create_capture_params, 0, sizeof(create_capture_params));
create_capture_params.dwVersion = NVFBC_CREATE_CAPTURE_SESSION_PARAMS_VER;
create_capture_params.eCaptureType = NVFBC_CAPTURE_SHARED_CUDA;
- create_capture_params.bWithCursor = (!direct_capture || driver_supports_direct_capture_cursor()) ? NVFBC_TRUE : NVFBC_FALSE;
+ create_capture_params.bWithCursor = (!direct_capture || supports_direct_cursor) ? NVFBC_TRUE : NVFBC_FALSE;
if(capture_region) {
create_capture_params.captureBox = { x, y, width, height };
*display_width = width;
*display_height = height;
}
create_capture_params.eTrackingType = tracking_type;
- create_capture_params.dwSamplingRateMs = 1000 / fps;
+ create_capture_params.dwSamplingRateMs = 1000 / (fps + 1);
create_capture_params.bAllowDirectCapture = direct_capture ? NVFBC_TRUE : NVFBC_FALSE;
create_capture_params.bPushModel = direct_capture ? NVFBC_TRUE : NVFBC_FALSE;
if(tracking_type == NVFBC_TRACKING_OUTPUT)
@@ -192,13 +216,14 @@ public:
NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab_params;
memset(&grab_params, 0, sizeof(grab_params));
grab_params.dwVersion = NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER;
- grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH;
+ grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT;// | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH;//NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY;
grab_params.pFrameGrabInfo = &frame_info;
grab_params.pCUDADeviceBuffer = cu_device_ptr;
+ grab_params.dwTimeoutMs = 0;//1000 / (fps + 10);
status = nv_fbc_function_list.nvFBCToCudaGrabFrame(nv_fbc_handle, &grab_params);
if(status != NVFBC_SUCCESS) {
- fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
+ fprintf(stderr, "Error: capture: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle));
return false;
}
@@ -246,28 +271,45 @@ private:
}
// TODO: Test with optimus and open kernel modules
- static bool driver_supports_direct_capture_cursor() {
+ static bool get_driver_version(int *major, int *minor) {
+ *major = 0;
+ *minor = 0;
+
FILE *f = fopen("/proc/driver/nvidia/version", "rb");
- if(!f)
+ if(!f) {
+ fprintf(stderr, "Warning: failed to get nvidia driver version (failed to read /proc/driver/nvidia/version)\n");
return false;
+ }
char buffer[2048];
size_t bytes_read = fread(buffer, 1, sizeof(buffer) - 1, f);
buffer[bytes_read] = '\0';
- bool supports_cursor = false;
+ bool success = false;
const char *p = strstr(buffer, "Kernel Module");
if(p) {
p += 13;
int driver_major_version = 0, driver_minor_version = 0;
if(sscanf(p, "%d.%d", &driver_major_version, &driver_minor_version) == 2) {
- if(driver_major_version > 515 || (driver_major_version == 515 && driver_minor_version >= 57))
- supports_cursor = true;
+ *major = driver_major_version;
+ *minor = driver_minor_version;
+ success = true;
}
}
+ if(!success)
+ fprintf(stderr, "Warning: failed to get nvidia driver version\n");
+
fclose(f);
- return supports_cursor;
+ return success;
+ }
+
+ static bool version_at_least(int major, int minor, int expected_major, int expected_minor) {
+ return major > expected_major || (major == expected_major && minor >= expected_minor);
+ }
+
+ static bool version_less_than(int major, int minor, int expected_major, int expected_minor) {
+ return major < expected_major || (major == expected_major && minor < expected_minor);
}
private:
void *library = nullptr;
@@ -275,4 +317,5 @@ private:
NVFBC_API_FUNCTION_LIST nv_fbc_function_list;
NVFBC_SESSION_HANDLE nv_fbc_handle;
bool fbc_handle_created = false;
+ int fps = 0;
};
diff --git a/src/main.cpp b/src/main.cpp
index 021d6e3..05f062d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -882,10 +882,13 @@ static void open_video(AVCodecContext *codec_context,
// with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on
// older gpus p5-p7 slow the gpu down to a crawl...
// "hq" is now just an alias for p7 in ffmpeg :(
+ // TODO: Temporary disable because of stuttering?
+ /*
if(very_old_gpu)
av_dict_set(&options, "preset", supports_p4 ? "p4" : "medium", 0);
else
av_dict_set(&options, "preset", supports_p7 ? "p7" : "slow", 0);
+ */
av_dict_set(&options, "tune", "hq", 0);
av_dict_set(&options, "rc", "constqp", 0);
@@ -948,16 +951,8 @@ static void usage() {
exit(1);
}
-static sig_atomic_t started = 0;
static sig_atomic_t running = 1;
static sig_atomic_t save_replay = 0;
-static const char *pid_file = "/tmp/gpu-screen-recorder";
-
-static void term_handler(int) {
- if(started)
- unlink(pid_file);
- exit(0);
-}
static void int_handler(int) {
running = 0;
@@ -1172,7 +1167,6 @@ static bool is_livestream_path(const char *str) {
}
int main(int argc, char **argv) {
- signal(SIGTERM, term_handler);
signal(SIGINT, int_handler);
signal(SIGUSR1, save_replay_handler);
@@ -1213,6 +1207,8 @@ int main(int argc, char **argv) {
VideoCodec video_codec;
const char *codec_to_use = args["-k"].value();
+ fprintf(stderr, "Info: forcing codec to h264 to investigate stuttering with some configs\n");
+ codec_to_use = "h264";
if(!codec_to_use)
codec_to_use = "auto";
@@ -1681,6 +1677,8 @@ int main(int argc, char **argv) {
frame->extended_data = frame->data;
}
+ frame->color_range = AVCOL_RANGE_JPEG;
+
if(window_pixmap.texture_width < record_width)
frame->width = window_pixmap.texture_width & ~1;
else
@@ -1818,21 +1816,16 @@ int main(int argc, char **argv) {
}, av_format_context, &write_output_mutex);
}
- started = 1;
-
// Set update_fps to 24 to test if duplicate/delayed frames cause video/audio desync or too fast/slow video.
const double update_fps = fps + 190;
int64_t video_pts_counter = 0;
- bool redraw = true;
XEvent e;
while (running) {
double frame_start = clock_get_monotonic_seconds();
if(window)
gl.glClear(GL_COLOR_BUFFER_BIT);
- redraw = true;
-
if(src_window_id) {
if (XCheckTypedWindowEvent(dpy, src_window_id, DestroyNotify, &e)) {
running = 0;
@@ -1936,108 +1929,102 @@ int main(int argc, char **argv) {
if (frame_time_overflow >= 0.0) {
frame_timer_start = time_now - frame_time_overflow;
- bool frame_captured = true;
- if(redraw) {
- redraw = false;
- if(src_window_id) {
- // TODO: Use a framebuffer instead. glCopyImageSubData requires
- // opengl 4.2
- int source_x = 0;
- int source_y = 0;
+ if(src_window_id) {
+ // TODO: Use a framebuffer instead. glCopyImageSubData requires
+ // opengl 4.2
+ int source_x = 0;
+ int source_y = 0;
- int source_width = window_pixmap.texture_width;
- int source_height = window_pixmap.texture_height;
+ int source_width = window_pixmap.texture_width;
+ int source_height = window_pixmap.texture_height;
- bool clamped = false;
+ bool clamped = false;
- if(window_pixmap.composite_window) {
- source_x = window_x;
- source_y = window_y;
+ if(window_pixmap.composite_window) {
+ source_x = window_x;
+ source_y = window_y;
- int underflow_x = 0;
- int underflow_y = 0;
+ int underflow_x = 0;
+ int underflow_y = 0;
- if(source_x < 0) {
- underflow_x = -source_x;
- source_x = 0;
- source_width += source_x;
- }
+ if(source_x < 0) {
+ underflow_x = -source_x;
+ source_x = 0;
+ source_width += source_x;
+ }
- if(source_y < 0) {
- underflow_y = -source_y;
- source_y = 0;
- source_height += source_y;
- }
+ if(source_y < 0) {
+ underflow_y = -source_y;
+ source_y = 0;
+ source_height += source_y;
+ }
- const int clamped_source_width = std::max(0, window_pixmap.texture_real_width - source_x - underflow_x);
- const int clamped_source_height = std::max(0, window_pixmap.texture_real_height - source_y - underflow_y);
+ const int clamped_source_width = std::max(0, window_pixmap.texture_real_width - source_x - underflow_x);
+ const int clamped_source_height = std::max(0, window_pixmap.texture_real_height - source_y - underflow_y);
- if(clamped_source_width < source_width) {
- source_width = clamped_source_width;
- clamped = true;
- }
-
- if(clamped_source_height < source_height) {
- source_height = clamped_source_height;
- clamped = true;
- }
+ if(clamped_source_width < source_width) {
+ source_width = clamped_source_width;
+ clamped = true;
}
- if(clamped) {
- // Requires opengl 4.4... TODO: Replace with earlier opengl if opengl < 4.2
- if(gl.glClearTexImage)
- gl.glClearTexImage(window_pixmap.target_texture_id, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr);
+ if(clamped_source_height < source_height) {
+ source_height = clamped_source_height;
+ clamped = true;
}
+ }
- // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2
- gl.glCopyImageSubData(
- window_pixmap.texture_id, GL_TEXTURE_2D, 0, source_x, source_y, 0,
- window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
- source_width, source_height, 1);
- unsigned int err = gl.glGetError();
- if(err != 0) {
- static bool error_shown = false;
- if(!error_shown) {
- error_shown = true;
- fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err);
- }
- }
- gl.glXSwapBuffers(dpy, window);
- // int err = gl.glGetError();
- // fprintf(stderr, "error: %d\n", err);
+ if(clamped) {
+ // Requires opengl 4.4... TODO: Replace with earlier opengl if opengl < 4.2
+ if(gl.glClearTexImage)
+ gl.glClearTexImage(window_pixmap.target_texture_id, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr);
+ }
- // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id
+ // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2
+ gl.glCopyImageSubData(
+ window_pixmap.texture_id, GL_TEXTURE_2D, 0, source_x, source_y, 0,
+ window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0,
+ source_width, source_height, 1);
+ unsigned int err = gl.glGetError();
+ if(err != 0) {
+ static bool error_shown = false;
+ if(!error_shown) {
+ error_shown = true;
+ fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err);
+ }
+ }
+ gl.glXSwapBuffers(dpy, window);
+ // int err = gl.glGetError();
+ // fprintf(stderr, "error: %d\n", err);
- frame->linesize[0] = frame->width * 4;
+ // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id
- CUDA_MEMCPY2D memcpy_struct;
- memcpy_struct.srcXInBytes = 0;
- memcpy_struct.srcY = 0;
- memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
+ frame->linesize[0] = frame->width * 4;
- memcpy_struct.dstXInBytes = 0;
- memcpy_struct.dstY = 0;
- memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
+ CUDA_MEMCPY2D memcpy_struct;
+ memcpy_struct.srcXInBytes = 0;
+ memcpy_struct.srcY = 0;
+ memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
- memcpy_struct.srcArray = mapped_array;
- memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
- memcpy_struct.dstPitch = frame->linesize[0];
- memcpy_struct.WidthInBytes = frame->width * 4;
- memcpy_struct.Height = frame->height;
- cuda.cuMemcpy2D_v2(&memcpy_struct);
+ memcpy_struct.dstXInBytes = 0;
+ memcpy_struct.dstY = 0;
+ memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
- frame_captured = true;
- } else {
- // TODO: Check when src_cu_device_ptr changes and re-register resource
- frame->linesize[0] = frame->width * 4;
+ memcpy_struct.srcArray = mapped_array;
+ memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0];
+ memcpy_struct.dstPitch = frame->linesize[0];
+ memcpy_struct.WidthInBytes = frame->width * 4;
+ memcpy_struct.Height = frame->height;
+ cuda.cuMemcpy2D_v2(&memcpy_struct);
+ } else {
+ // TODO: Check when src_cu_device_ptr changes and re-register resource
+ frame->linesize[0] = frame->width * 4;
- uint32_t byte_size = 0;
- CUdeviceptr src_cu_device_ptr = 0;
- frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
- frame->data[0] = (uint8_t*)src_cu_device_ptr;
- }
- // res = cuda.cuCtxPopCurrent_v2(&old_ctx);
+ uint32_t byte_size = 0;
+ CUdeviceptr src_cu_device_ptr = 0;
+ nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
+ frame->data[0] = (uint8_t*)src_cu_device_ptr;
}
+ // res = cuda.cuCtxPopCurrent_v2(&old_ctx);
const double this_video_frame_time = clock_get_monotonic_seconds();
const int64_t expected_frames = std::round((this_video_frame_time - start_time_pts) / target_fps);
@@ -2097,6 +2084,5 @@ int main(int argc, char **argv) {
if(dpy)
XCloseDisplay(dpy);
- unlink(pid_file);
free(empty_audio);
}