From ce433d9b1e08ffd33b8eaff9fcecbfc41a5faf51 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 18 Oct 2022 09:02:24 +0200 Subject: Attempt to reduce stuttering of video --- README.md | 4 +- include/NvFBCLibrary.hpp | 63 ++++++++++++++--- src/main.cpp | 180 ++++++++++++++++++++++------------------------- 3 files changed, 137 insertions(+), 110 deletions(-) diff --git a/README.md b/README.md index 78545cf..c4e7a3b 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,6 @@ If you are using a variable refresh rate monitor, then choose to record "screen- For screen capture to work with PRIME (laptops with a nvidia gpu), you must set the primary GPU to use your dedicated nvidia graphics card. You can do this by selecting "NVIDIA (Performance Mode) in nvidia settings:\ ![](https://dec05eba.com/images/nvidia-settings-prime.png)\ and then rebooting your laptop. -### TEMPORARY ISSUE ### -screen-direct capture has been temporary disabled as it causes issues with stuttering. This might be a nvfbc bug. # Performance When recording Legend of Zelda Breath of the Wild at 4k, fps drops from 30 to 7 when using OBS Studio + nvenc, however when using this screen recorder the fps remains at 30.\ @@ -59,6 +57,6 @@ FFMPEG only uses the GPU with CUDA when doing transcoding from an input video to libraries at compile-time. * Clean up the code! * Dynamically change bitrate/resolution to match desired fps. This would be helpful when streaming for example, where the encode output speed also depends on upload speed to the streaming service. -* Show cursor when recording. Currently the cursor is not visible when recording a window. +* Show cursor when recording. Currently the cursor is not visible when recording a window or screen-direct. * Implement opengl injection to capture texture. This fixes composition issues and (VRR) without having to use NvFBC direct capture. * Always use direct capture with NvFBC once the capture issue in mpv fullscreen has been resolved (maybe detect if direct capture fails in nvfbc and switch to non-direct recording. NvFBC says if direct capture fails). diff --git a/include/NvFBCLibrary.hpp b/include/NvFBCLibrary.hpp index 19b9bcc..dc7db1f 100644 --- a/include/NvFBCLibrary.hpp +++ b/include/NvFBCLibrary.hpp @@ -61,8 +61,32 @@ public: if(!library || !display_to_capture || !display_width || !display_height || fbc_handle_created) return false; + this->fps = fps; const bool capture_region = (x > 0 || y > 0 || width > 0 || height > 0); + bool supports_direct_cursor = false; + int driver_major_version = 0; + int driver_minor_version = 0; + if(direct_capture && get_driver_version(&driver_major_version, &driver_minor_version)) { + fprintf(stderr, "Info: detected nvidia version: %d.%d\n", driver_major_version, driver_minor_version); + + if(version_at_least(driver_major_version, driver_minor_version, 515, 57) && version_less_than(driver_major_version, driver_minor_version, 520, 56)) { + direct_capture = false; + fprintf(stderr, "Warning: \"screen-direct\" has temporary been disabled as it causes stuttering with driver versions >= 515.57 and < 520.56. Please update your driver if possible. Capturing \"screen\" instead.\n"); + } + + // TODO: + // Cursor capture disabled because moving the cursor doesn't update capture rate to monitor hz and instead captures at 10-30 hz + /* + if(direct_capture) { + if(version_at_least(driver_major_version, driver_minor_version, 515, 57)) + supports_direct_cursor = true; + else + fprintf(stderr, "Info: capturing \"screen-direct\" but driver version appears to be less than 515.57. Disabling capture of cursor. Please update your driver if you want to capture your cursor or record \"screen\" instead.\n"); + } + */ + } + NVFBCSTATUS status; NVFBC_TRACKING_TYPE tracking_type; bool capture_session_created = false; @@ -129,14 +153,14 @@ public: memset(&create_capture_params, 0, sizeof(create_capture_params)); create_capture_params.dwVersion = NVFBC_CREATE_CAPTURE_SESSION_PARAMS_VER; create_capture_params.eCaptureType = NVFBC_CAPTURE_SHARED_CUDA; - create_capture_params.bWithCursor = (!direct_capture || driver_supports_direct_capture_cursor()) ? NVFBC_TRUE : NVFBC_FALSE; + create_capture_params.bWithCursor = (!direct_capture || supports_direct_cursor) ? NVFBC_TRUE : NVFBC_FALSE; if(capture_region) { create_capture_params.captureBox = { x, y, width, height }; *display_width = width; *display_height = height; } create_capture_params.eTrackingType = tracking_type; - create_capture_params.dwSamplingRateMs = 1000 / fps; + create_capture_params.dwSamplingRateMs = 1000 / (fps + 1); create_capture_params.bAllowDirectCapture = direct_capture ? NVFBC_TRUE : NVFBC_FALSE; create_capture_params.bPushModel = direct_capture ? NVFBC_TRUE : NVFBC_FALSE; if(tracking_type == NVFBC_TRACKING_OUTPUT) @@ -192,13 +216,14 @@ public: NVFBC_TOCUDA_GRAB_FRAME_PARAMS grab_params; memset(&grab_params, 0, sizeof(grab_params)); grab_params.dwVersion = NVFBC_TOCUDA_GRAB_FRAME_PARAMS_VER; - grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH; + grab_params.dwFlags = NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT;// | NVFBC_TOCUDA_GRAB_FLAGS_FORCE_REFRESH;//NVFBC_TOCUDA_GRAB_FLAGS_NOWAIT_IF_NEW_FRAME_READY; grab_params.pFrameGrabInfo = &frame_info; grab_params.pCUDADeviceBuffer = cu_device_ptr; + grab_params.dwTimeoutMs = 0;//1000 / (fps + 10); status = nv_fbc_function_list.nvFBCToCudaGrabFrame(nv_fbc_handle, &grab_params); if(status != NVFBC_SUCCESS) { - fprintf(stderr, "Error: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle)); + fprintf(stderr, "Error: capture: %s\n", nv_fbc_function_list.nvFBCGetLastErrorStr(nv_fbc_handle)); return false; } @@ -246,28 +271,45 @@ private: } // TODO: Test with optimus and open kernel modules - static bool driver_supports_direct_capture_cursor() { + static bool get_driver_version(int *major, int *minor) { + *major = 0; + *minor = 0; + FILE *f = fopen("/proc/driver/nvidia/version", "rb"); - if(!f) + if(!f) { + fprintf(stderr, "Warning: failed to get nvidia driver version (failed to read /proc/driver/nvidia/version)\n"); return false; + } char buffer[2048]; size_t bytes_read = fread(buffer, 1, sizeof(buffer) - 1, f); buffer[bytes_read] = '\0'; - bool supports_cursor = false; + bool success = false; const char *p = strstr(buffer, "Kernel Module"); if(p) { p += 13; int driver_major_version = 0, driver_minor_version = 0; if(sscanf(p, "%d.%d", &driver_major_version, &driver_minor_version) == 2) { - if(driver_major_version > 515 || (driver_major_version == 515 && driver_minor_version >= 57)) - supports_cursor = true; + *major = driver_major_version; + *minor = driver_minor_version; + success = true; } } + if(!success) + fprintf(stderr, "Warning: failed to get nvidia driver version\n"); + fclose(f); - return supports_cursor; + return success; + } + + static bool version_at_least(int major, int minor, int expected_major, int expected_minor) { + return major > expected_major || (major == expected_major && minor >= expected_minor); + } + + static bool version_less_than(int major, int minor, int expected_major, int expected_minor) { + return major < expected_major || (major == expected_major && minor < expected_minor); } private: void *library = nullptr; @@ -275,4 +317,5 @@ private: NVFBC_API_FUNCTION_LIST nv_fbc_function_list; NVFBC_SESSION_HANDLE nv_fbc_handle; bool fbc_handle_created = false; + int fps = 0; }; diff --git a/src/main.cpp b/src/main.cpp index 021d6e3..05f062d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -882,10 +882,13 @@ static void open_video(AVCodecContext *codec_context, // with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on // older gpus p5-p7 slow the gpu down to a crawl... // "hq" is now just an alias for p7 in ffmpeg :( + // TODO: Temporary disable because of stuttering? + /* if(very_old_gpu) av_dict_set(&options, "preset", supports_p4 ? "p4" : "medium", 0); else av_dict_set(&options, "preset", supports_p7 ? "p7" : "slow", 0); + */ av_dict_set(&options, "tune", "hq", 0); av_dict_set(&options, "rc", "constqp", 0); @@ -948,16 +951,8 @@ static void usage() { exit(1); } -static sig_atomic_t started = 0; static sig_atomic_t running = 1; static sig_atomic_t save_replay = 0; -static const char *pid_file = "/tmp/gpu-screen-recorder"; - -static void term_handler(int) { - if(started) - unlink(pid_file); - exit(0); -} static void int_handler(int) { running = 0; @@ -1172,7 +1167,6 @@ static bool is_livestream_path(const char *str) { } int main(int argc, char **argv) { - signal(SIGTERM, term_handler); signal(SIGINT, int_handler); signal(SIGUSR1, save_replay_handler); @@ -1213,6 +1207,8 @@ int main(int argc, char **argv) { VideoCodec video_codec; const char *codec_to_use = args["-k"].value(); + fprintf(stderr, "Info: forcing codec to h264 to investigate stuttering with some configs\n"); + codec_to_use = "h264"; if(!codec_to_use) codec_to_use = "auto"; @@ -1681,6 +1677,8 @@ int main(int argc, char **argv) { frame->extended_data = frame->data; } + frame->color_range = AVCOL_RANGE_JPEG; + if(window_pixmap.texture_width < record_width) frame->width = window_pixmap.texture_width & ~1; else @@ -1818,21 +1816,16 @@ int main(int argc, char **argv) { }, av_format_context, &write_output_mutex); } - started = 1; - // Set update_fps to 24 to test if duplicate/delayed frames cause video/audio desync or too fast/slow video. const double update_fps = fps + 190; int64_t video_pts_counter = 0; - bool redraw = true; XEvent e; while (running) { double frame_start = clock_get_monotonic_seconds(); if(window) gl.glClear(GL_COLOR_BUFFER_BIT); - redraw = true; - if(src_window_id) { if (XCheckTypedWindowEvent(dpy, src_window_id, DestroyNotify, &e)) { running = 0; @@ -1936,108 +1929,102 @@ int main(int argc, char **argv) { if (frame_time_overflow >= 0.0) { frame_timer_start = time_now - frame_time_overflow; - bool frame_captured = true; - if(redraw) { - redraw = false; - if(src_window_id) { - // TODO: Use a framebuffer instead. glCopyImageSubData requires - // opengl 4.2 - int source_x = 0; - int source_y = 0; + if(src_window_id) { + // TODO: Use a framebuffer instead. glCopyImageSubData requires + // opengl 4.2 + int source_x = 0; + int source_y = 0; - int source_width = window_pixmap.texture_width; - int source_height = window_pixmap.texture_height; + int source_width = window_pixmap.texture_width; + int source_height = window_pixmap.texture_height; - bool clamped = false; + bool clamped = false; - if(window_pixmap.composite_window) { - source_x = window_x; - source_y = window_y; + if(window_pixmap.composite_window) { + source_x = window_x; + source_y = window_y; - int underflow_x = 0; - int underflow_y = 0; + int underflow_x = 0; + int underflow_y = 0; - if(source_x < 0) { - underflow_x = -source_x; - source_x = 0; - source_width += source_x; - } + if(source_x < 0) { + underflow_x = -source_x; + source_x = 0; + source_width += source_x; + } - if(source_y < 0) { - underflow_y = -source_y; - source_y = 0; - source_height += source_y; - } + if(source_y < 0) { + underflow_y = -source_y; + source_y = 0; + source_height += source_y; + } - const int clamped_source_width = std::max(0, window_pixmap.texture_real_width - source_x - underflow_x); - const int clamped_source_height = std::max(0, window_pixmap.texture_real_height - source_y - underflow_y); + const int clamped_source_width = std::max(0, window_pixmap.texture_real_width - source_x - underflow_x); + const int clamped_source_height = std::max(0, window_pixmap.texture_real_height - source_y - underflow_y); - if(clamped_source_width < source_width) { - source_width = clamped_source_width; - clamped = true; - } - - if(clamped_source_height < source_height) { - source_height = clamped_source_height; - clamped = true; - } + if(clamped_source_width < source_width) { + source_width = clamped_source_width; + clamped = true; } - if(clamped) { - // Requires opengl 4.4... TODO: Replace with earlier opengl if opengl < 4.2 - if(gl.glClearTexImage) - gl.glClearTexImage(window_pixmap.target_texture_id, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr); + if(clamped_source_height < source_height) { + source_height = clamped_source_height; + clamped = true; } + } - // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2 - gl.glCopyImageSubData( - window_pixmap.texture_id, GL_TEXTURE_2D, 0, source_x, source_y, 0, - window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, - source_width, source_height, 1); - unsigned int err = gl.glGetError(); - if(err != 0) { - static bool error_shown = false; - if(!error_shown) { - error_shown = true; - fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); - } - } - gl.glXSwapBuffers(dpy, window); - // int err = gl.glGetError(); - // fprintf(stderr, "error: %d\n", err); + if(clamped) { + // Requires opengl 4.4... TODO: Replace with earlier opengl if opengl < 4.2 + if(gl.glClearTexImage) + gl.glClearTexImage(window_pixmap.target_texture_id, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr); + } - // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id + // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2 + gl.glCopyImageSubData( + window_pixmap.texture_id, GL_TEXTURE_2D, 0, source_x, source_y, 0, + window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, + source_width, source_height, 1); + unsigned int err = gl.glGetError(); + if(err != 0) { + static bool error_shown = false; + if(!error_shown) { + error_shown = true; + fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); + } + } + gl.glXSwapBuffers(dpy, window); + // int err = gl.glGetError(); + // fprintf(stderr, "error: %d\n", err); - frame->linesize[0] = frame->width * 4; + // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id - CUDA_MEMCPY2D memcpy_struct; - memcpy_struct.srcXInBytes = 0; - memcpy_struct.srcY = 0; - memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; + frame->linesize[0] = frame->width * 4; - memcpy_struct.dstXInBytes = 0; - memcpy_struct.dstY = 0; - memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; - memcpy_struct.srcArray = mapped_array; - memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; - memcpy_struct.dstPitch = frame->linesize[0]; - memcpy_struct.WidthInBytes = frame->width * 4; - memcpy_struct.Height = frame->height; - cuda.cuMemcpy2D_v2(&memcpy_struct); + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; - frame_captured = true; - } else { - // TODO: Check when src_cu_device_ptr changes and re-register resource - frame->linesize[0] = frame->width * 4; + memcpy_struct.srcArray = mapped_array; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; + memcpy_struct.dstPitch = frame->linesize[0]; + memcpy_struct.WidthInBytes = frame->width * 4; + memcpy_struct.Height = frame->height; + cuda.cuMemcpy2D_v2(&memcpy_struct); + } else { + // TODO: Check when src_cu_device_ptr changes and re-register resource + frame->linesize[0] = frame->width * 4; - uint32_t byte_size = 0; - CUdeviceptr src_cu_device_ptr = 0; - frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size); - frame->data[0] = (uint8_t*)src_cu_device_ptr; - } - // res = cuda.cuCtxPopCurrent_v2(&old_ctx); + uint32_t byte_size = 0; + CUdeviceptr src_cu_device_ptr = 0; + nv_fbc_library.capture(&src_cu_device_ptr, &byte_size); + frame->data[0] = (uint8_t*)src_cu_device_ptr; } + // res = cuda.cuCtxPopCurrent_v2(&old_ctx); const double this_video_frame_time = clock_get_monotonic_seconds(); const int64_t expected_frames = std::round((this_video_frame_time - start_time_pts) / target_fps); @@ -2097,6 +2084,5 @@ int main(int argc, char **argv) { if(dpy) XCloseDisplay(dpy); - unlink(pid_file); free(empty_audio); } -- cgit v1.2.3