From 62d61fda12e3774fee6b671e44fb89bd2ef8da8c Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 4 Jul 2024 01:35:03 +0200 Subject: Add support for software video encoding with '-encoder cpu' option, currently h264 only --- README.md | 1 + TODO | 13 ++ include/capture/capture.h | 3 +- include/capture/kms_software.h | 19 ++ include/capture/nvfbc.h | 1 + include/capture/xcomposite_software.h | 13 ++ include/egl.h | 3 +- meson.build | 5 +- src/capture/capture.c | 69 ++++++- src/capture/kms.c | 2 - src/capture/kms_cuda.c | 2 + src/capture/kms_software.c | 129 +++++++++++++ src/capture/kms_vaapi.c | 4 +- src/capture/nvfbc.c | 132 +++++++------ src/capture/xcomposite.c | 1 - src/capture/xcomposite_cuda.c | 2 + src/capture/xcomposite_software.c | 113 ++++++++++++ src/capture/xcomposite_vaapi.c | 6 +- src/color_conversion.c | 2 +- src/egl.c | 1 + src/main.cpp | 338 +++++++++++++++++++++++----------- 21 files changed, 687 insertions(+), 172 deletions(-) create mode 100644 include/capture/kms_software.h create mode 100644 include/capture/xcomposite_software.h create mode 100644 src/capture/kms_software.c create mode 100644 src/capture/xcomposite_software.c diff --git a/README.md b/README.md index 508460b..097a1c2 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ from one of the official sources before reporting it as an issue. If you install GPU Screen Recorder flatpak, which is the gtk gui version then you can still run GPU Screen Recorder command line by using the flatpak command option, for example `flatpak run --command=gpu-screen-recorder com.dec05eba.gpu_screen_recorder -w screen -f 60 -o video.mp4`. Note that if you want to record your monitor on AMD/Intel then you need to install the flatpak system-wide (like so: `flatpak install flathub --system com.dec05eba.gpu_screen_recorder`). # Dependencies +GPU Screen Recorder uses meson build system so you need to install `meson`. There are additional dependencies depending on your graphics card: ## AMD libglvnd (which provides libgl and libegl)\ mesa\ diff --git a/TODO b/TODO index 7301a03..1051253 100644 --- a/TODO +++ b/TODO @@ -139,3 +139,16 @@ Support selecting which gpu to use. This can be done in egl with eglQueryDevices Maybe on glx (nvidia x11 nvfbc) we need to use __NV_PRIME_RENDER_OFFLOAD_PROVIDER and __GLX_VENDOR_LIBRARY_NAME instead. Remove is_damaged and clear_damage and return a value from capture function instead that states if the image has been updated or not. + +Test install intel-hybrid-codec-driver-git for vp8 encoding on intel. +When adding support for steam deck, add option to send video to another computer. +New gpu screen recorder gui should have the option to cut the video directly, maybe running an ffmpeg command or implementing that ourselves. Only support gpu screen recorder video files. +Add hdr metadata to encoder settings metadata. + +Check if is software renderer by using eglQueryDisplayAttribEXT(egl_display, EGL_DEVICE_EXT..) eglQueryDeviceStringEXT(egl_device, EGL_EXTENSIONS) and check for "EGL_MESA_device_software". + +Use MapTexture2DINTEL for software encoding on intel. + +To test vulkan encode on amd set the environment variable RADV_PERFTEST=video_encode before running a program that uses vulkan encode (or queries for it, such as vulkaninfo). + +Support hevc/av1 for software encoder and hdr support at the same time. Need support for yuv420p shader for that. Use libx265 for hevc and libsvtav1 for av1 (libsvtav1 is the fastest software av1 video encoder). Also support vp8/vp9 since we are not limited by hardware. \ No newline at end of file diff --git a/include/capture/capture.h b/include/capture/capture.h index fbbe767..8c654f4 100644 --- a/include/capture/capture.h +++ b/include/capture/capture.h @@ -62,9 +62,10 @@ void gsr_capture_destroy(gsr_capture *cap, AVCodecContext *video_codec_context); bool gsr_capture_base_setup_vaapi_textures(gsr_capture_base *self, AVFrame *frame, VADisplay va_dpy, VADRMPRIMESurfaceDescriptor *prime, gsr_color_range color_range); bool gsr_capture_base_setup_cuda_textures(gsr_capture_base *self, AVFrame *frame, gsr_cuda_context *cuda_context, gsr_color_range color_range, gsr_source_color source_color, bool hdr); +bool gsr_capture_base_setup_textures(gsr_capture_base *self, AVFrame *frame, gsr_color_range color_range, gsr_source_color source_color, bool hdr, bool cursor_texture_is_external); void gsr_capture_base_stop(gsr_capture_base *self); -bool drm_create_codec_context(const char *card_path, AVCodecContext *video_codec_context, int width, int height, bool hdr, VADisplay *va_dpy); +bool vaapi_create_codec_context(const char *card_path, AVCodecContext *video_codec_context, int width, int height, bool hdr, VADisplay *va_dpy); bool cuda_create_codec_context(CUcontext cu_ctx, AVCodecContext *video_codec_context, int width, int height, bool hdr, CUstream *cuda_stream); #endif /* GSR_CAPTURE_CAPTURE_H */ diff --git a/include/capture/kms_software.h b/include/capture/kms_software.h new file mode 100644 index 0000000..f9b57f3 --- /dev/null +++ b/include/capture/kms_software.h @@ -0,0 +1,19 @@ +#ifndef GSR_CAPTURE_KMS_SOFTWARE_H +#define GSR_CAPTURE_KMS_SOFTWARE_H + +#include "../vec2.h" +#include "../utils.h" +#include "../color_conversion.h" +#include "capture.h" + +typedef struct { + gsr_egl *egl; + const char *display_to_capture; /* if this is "screen", then the first monitor is captured. A copy is made of this */ + bool hdr; + gsr_color_range color_range; + bool record_cursor; +} gsr_capture_kms_software_params; + +gsr_capture* gsr_capture_kms_software_create(const gsr_capture_kms_software_params *params); + +#endif /* GSR_CAPTURE_KMS_SOFTWARE_H */ diff --git a/include/capture/nvfbc.h b/include/capture/nvfbc.h index 36bc2b6..e24b712 100644 --- a/include/capture/nvfbc.h +++ b/include/capture/nvfbc.h @@ -15,6 +15,7 @@ typedef struct { bool hdr; gsr_color_range color_range; bool record_cursor; + bool use_software_video_encoder; } gsr_capture_nvfbc_params; gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params); diff --git a/include/capture/xcomposite_software.h b/include/capture/xcomposite_software.h new file mode 100644 index 0000000..d41083c --- /dev/null +++ b/include/capture/xcomposite_software.h @@ -0,0 +1,13 @@ +#ifndef GSR_CAPTURE_XCOMPOSITE_SOFTWARE_H +#define GSR_CAPTURE_XCOMPOSITE_SOFTWARE_H + +#include "capture.h" +#include "xcomposite.h" + +typedef struct { + gsr_capture_xcomposite_params base; +} gsr_capture_xcomposite_software_params; + +gsr_capture* gsr_capture_xcomposite_software_create(const gsr_capture_xcomposite_software_params *params); + +#endif /* GSR_CAPTURE_XCOMPOSITE_SOFTWARE_H */ diff --git a/include/egl.h b/include/egl.h index 64dd2c6..899c0d0 100644 --- a/include/egl.h +++ b/include/egl.h @@ -104,7 +104,7 @@ typedef void(*__GLXextFuncPtr)(void); #define GL_FRAMEBUFFER 0x8D40 #define GL_COLOR_ATTACHMENT0 0x8CE0 #define GL_FRAMEBUFFER_COMPLETE 0x8CD5 -#define GL_STREAM_DRAW 0x88E0 +#define GL_DYNAMIC_DRAW 0x88E8 #define GL_ARRAY_BUFFER 0x8892 #define GL_BLEND 0x0BE2 #define GL_SRC_ALPHA 0x0302 @@ -238,6 +238,7 @@ struct gsr_egl { void (*glTexImage2D)(unsigned int target, int level, int internalFormat, int width, int height, int border, unsigned int format, unsigned int type, const void *pixels); void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth); void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data); + void (*glGetTexImage)(unsigned int target, int level, unsigned int format, unsigned int type, void *pixels); void (*glGenFramebuffers)(int n, unsigned int *framebuffers); void (*glBindFramebuffer)(unsigned int target, unsigned int framebuffer); void (*glDeleteFramebuffers)(int n, const unsigned int *framebuffers); diff --git a/meson.build b/meson.build index a188f16..31e849f 100644 --- a/meson.build +++ b/meson.build @@ -14,8 +14,10 @@ src = [ 'src/capture/xcomposite.c', 'src/capture/xcomposite_cuda.c', 'src/capture/xcomposite_vaapi.c', - 'src/capture/kms_vaapi.c', + 'src/capture/xcomposite_software.c', 'src/capture/kms_cuda.c', + 'src/capture/kms_vaapi.c', + 'src/capture/kms_software.c', 'src/capture/kms.c', 'src/egl.c', 'src/cuda.c', @@ -30,6 +32,7 @@ src = [ 'src/sound.cpp', 'src/main.cpp', ] + dep = [ dependency('libavcodec'), dependency('libavformat'), diff --git a/src/capture/capture.c b/src/capture/capture.c index 5e1f546..a5735fb 100644 --- a/src/capture/capture.c +++ b/src/capture/capture.c @@ -281,6 +281,73 @@ bool gsr_capture_base_setup_cuda_textures(gsr_capture_base *self, AVFrame *frame return true; } +bool gsr_capture_base_setup_textures(gsr_capture_base *self, AVFrame *frame, gsr_color_range color_range, gsr_source_color source_color, bool hdr, bool cursor_texture_is_external) { + int res = av_frame_get_buffer(frame, 1); // TODO: Align? + if(res < 0) { + fprintf(stderr, "gsr error: gsr_capture_base_setup_textures: av_frame_get_buffer failed: %d\n", res); + return false; + } + + res = av_frame_make_writable(frame); + if(res < 0) { + fprintf(stderr, "gsr error: gsr_capture_base_setup_textures: av_frame_make_writable failed: %d\n", res); + return false; + } + + self->egl->glGenTextures(1, &self->input_texture); + self->egl->glBindTexture(GL_TEXTURE_2D, self->input_texture); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + self->egl->glBindTexture(GL_TEXTURE_2D, 0); + + const int target = cursor_texture_is_external ? GL_TEXTURE_EXTERNAL_OES : GL_TEXTURE_2D; + self->egl->glGenTextures(1, &self->cursor_texture); + self->egl->glBindTexture(target, self->cursor_texture); + self->egl->glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + self->egl->glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + self->egl->glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + self->egl->glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + self->egl->glBindTexture(target, 0); + + const unsigned int internal_formats_nv12[2] = { GL_R8, GL_RG8 }; + const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; + const unsigned int formats[2] = { GL_RED, GL_RG }; + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + + for(int i = 0; i < 2; ++i) { + self->target_textures[i] = gl_create_texture(self->egl, self->video_codec_context->width / div[i], self->video_codec_context->height / div[i], !hdr ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i]); + if(self->target_textures[i] == 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_setup_cuda_textures: failed to create opengl texture\n"); + return false; + } + } + + gsr_color_conversion_params color_conversion_params = {0}; + color_conversion_params.color_range = color_range; + color_conversion_params.egl = self->egl; + color_conversion_params.source_color = source_color; + if(!hdr) + color_conversion_params.destination_color = GSR_DESTINATION_COLOR_NV12; + else + color_conversion_params.destination_color = GSR_DESTINATION_COLOR_P010; + + color_conversion_params.destination_textures[0] = self->target_textures[0]; + color_conversion_params.destination_textures[1] = self->target_textures[1]; + color_conversion_params.num_destination_textures = 2; + color_conversion_params.load_external_image_shader = true; + + if(gsr_color_conversion_init(&self->color_conversion, &color_conversion_params) != 0) { + fprintf(stderr, "gsr error: gsr_capture_kms_setup_cuda_textures: failed to create color conversion\n"); + return false; + } + + gsr_color_conversion_clear(&self->color_conversion); + + return true; +} + void gsr_capture_base_stop(gsr_capture_base *self) { gsr_color_conversion_deinit(&self->color_conversion); @@ -306,7 +373,7 @@ void gsr_capture_base_stop(gsr_capture_base *self) { av_buffer_unref(&self->video_codec_context->hw_frames_ctx); } -bool drm_create_codec_context(const char *card_path, AVCodecContext *video_codec_context, int width, int height, bool hdr, VADisplay *va_dpy) { +bool vaapi_create_codec_context(const char *card_path, AVCodecContext *video_codec_context, int width, int height, bool hdr, VADisplay *va_dpy) { char render_path[128]; if(!gsr_card_path_get_render_path(card_path, render_path)) { fprintf(stderr, "gsr error: failed to get /dev/dri/renderDXXX file from %s\n", card_path); diff --git a/src/capture/kms.c b/src/capture/kms.c index ec83cab..0d9bab1 100644 --- a/src/capture/kms.c +++ b/src/capture/kms.c @@ -212,7 +212,6 @@ static vec2i swap_vec2i(vec2i value) { bool gsr_capture_kms_capture(gsr_capture_kms *self, AVFrame *frame, bool hdr, bool screen_plane_use_modifiers, bool cursor_texture_is_external, bool record_cursor) { //egl->glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - self->base.egl->glClear(0); gsr_capture_kms_cleanup_kms_fds(self); @@ -380,7 +379,6 @@ bool gsr_capture_kms_capture(gsr_capture_kms *self, AVFrame *frame, bool hdr, bo self->base.egl->glDisable(GL_SCISSOR_TEST); } - self->base.egl->eglSwapBuffers(self->base.egl->egl_display, self->base.egl->egl_surface); //self->base.egl->glFlush(); //self->base.egl->glFinish(); diff --git a/src/capture/kms_cuda.c b/src/capture/kms_cuda.c index a9f1f8e..a35e263 100644 --- a/src/capture/kms_cuda.c +++ b/src/capture/kms_cuda.c @@ -85,7 +85,9 @@ static void gsr_capture_kms_unload_cuda_graphics(gsr_capture_kms_cuda *cap_kms) static int gsr_capture_kms_cuda_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_kms_cuda *cap_kms = cap->priv; + cap_kms->kms.base.egl->glClear(0); gsr_capture_kms_capture(&cap_kms->kms, frame, cap_kms->params.hdr, true, true, cap_kms->params.record_cursor); + cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size for(int i = 0; i < 2; ++i) { diff --git a/src/capture/kms_software.c b/src/capture/kms_software.c new file mode 100644 index 0000000..1f4daf4 --- /dev/null +++ b/src/capture/kms_software.c @@ -0,0 +1,129 @@ +#include "../../include/capture/kms_software.h" +#include "../../include/capture/kms.h" +#include +#include +#include +#include +#include + +typedef struct { + gsr_capture_kms kms; + gsr_capture_kms_software_params params; +} gsr_capture_kms_software; + +static void gsr_capture_kms_software_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +#define GL_DYNAMIC_READ 0x88E9 + +static int gsr_capture_kms_software_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) { + gsr_capture_kms_software *cap_kms = cap->priv; + + const int res = gsr_capture_kms_start(&cap_kms->kms, cap_kms->params.display_to_capture, cap_kms->params.egl, video_codec_context, frame); + if(res != 0) { + gsr_capture_kms_software_stop(cap, video_codec_context); + return res; + } + + if(!gsr_capture_base_setup_textures(&cap_kms->kms.base, frame, cap_kms->params.color_range, GSR_SOURCE_COLOR_RGB, cap_kms->params.hdr, cap_kms->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_NVIDIA)) { + gsr_capture_kms_software_stop(cap, video_codec_context); + return -1; + } + + return 0; +} + +static bool gsr_capture_kms_software_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_kms_software *cap_kms = cap->priv; + if(cap_kms->kms.should_stop) { + if(err) + *err = cap_kms->kms.stop_is_error; + return true; + } + + if(err) + *err = false; + return false; +} + +static int gsr_capture_kms_software_capture(gsr_capture *cap, AVFrame *frame) { + gsr_capture_kms_software *cap_kms = cap->priv; + + cap_kms->kms.base.egl->glClear(0); + gsr_capture_kms_capture(&cap_kms->kms, frame, cap_kms->params.hdr, cap_kms->params.egl->gpu_info.vendor != GSR_GPU_VENDOR_AMD, cap_kms->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_NVIDIA, cap_kms->params.record_cursor); + + // TODO: hdr support + const unsigned int formats[2] = { GL_RED, GL_RG }; + for(int i = 0; i < 2; ++i) { + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, cap_kms->kms.base.target_textures[i]); + cap_kms->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); + } + cap_kms->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); + + return 0; +} + +static void gsr_capture_kms_software_capture_end(gsr_capture *cap, AVFrame *frame) { + (void)frame; + gsr_capture_kms_software *cap_kms = cap->priv; + gsr_capture_kms_cleanup_kms_fds(&cap_kms->kms); +} + +static void gsr_capture_kms_software_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + (void)video_codec_context; + gsr_capture_kms_software *cap_kms = cap->priv; + gsr_capture_kms_stop(&cap_kms->kms); +} + +static void gsr_capture_kms_software_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + (void)video_codec_context; + gsr_capture_kms_software *cap_kms = cap->priv; + if(cap->priv) { + gsr_capture_kms_software_stop(cap, video_codec_context); + free((void*)cap_kms->params.display_to_capture); + cap_kms->params.display_to_capture = NULL; + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_kms_software_create(const gsr_capture_kms_software_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_kms_software_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_kms_software *cap_kms = calloc(1, sizeof(gsr_capture_kms_software)); + if(!cap_kms) { + free(cap); + return NULL; + } + + const char *display_to_capture = strdup(params->display_to_capture); + if(!display_to_capture) { + free(cap); + free(cap_kms); + return NULL; + } + + cap_kms->params = *params; + cap_kms->params.display_to_capture = display_to_capture; + + *cap = (gsr_capture) { + .start = gsr_capture_kms_software_start, + .tick = NULL, + .should_stop = gsr_capture_kms_software_should_stop, + .capture = gsr_capture_kms_software_capture, + .capture_end = gsr_capture_kms_software_capture_end, + .destroy = gsr_capture_kms_software_destroy, + .priv = cap_kms + }; + + return cap; +} diff --git a/src/capture/kms_vaapi.c b/src/capture/kms_vaapi.c index b9c9ee5..9ca965d 100644 --- a/src/capture/kms_vaapi.c +++ b/src/capture/kms_vaapi.c @@ -29,7 +29,7 @@ static int gsr_capture_kms_vaapi_start(gsr_capture *cap, AVCodecContext *video_c return res; } - if(!drm_create_codec_context(cap_kms->params.egl->card_path, video_codec_context, video_codec_context->width, video_codec_context->height, cap_kms->params.hdr, &cap_kms->va_dpy)) { + if(!vaapi_create_codec_context(cap_kms->params.egl->card_path, video_codec_context, video_codec_context->width, video_codec_context->height, cap_kms->params.hdr, &cap_kms->va_dpy)) { gsr_capture_kms_vaapi_stop(cap, video_codec_context); return -1; } @@ -57,7 +57,9 @@ static bool gsr_capture_kms_vaapi_should_stop(gsr_capture *cap, bool *err) { static int gsr_capture_kms_vaapi_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_kms_vaapi *cap_kms = cap->priv; + cap_kms->kms.base.egl->glClear(0); gsr_capture_kms_capture(&cap_kms->kms, frame, cap_kms->params.hdr, cap_kms->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_INTEL, false, cap_kms->params.record_cursor); + cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); return 0; } diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 9eabb18..6608aef 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -43,13 +43,6 @@ typedef struct { double nvfbc_dead_start; } gsr_capture_nvfbc; -#if defined(_WIN64) || defined(__LP64__) -typedef unsigned long long CUdeviceptr_v2; -#else -typedef unsigned int CUdeviceptr_v2; -#endif -typedef CUdeviceptr_v2 CUdeviceptr; - static int max_int(int a, int b) { return a > b ? a : b; } @@ -301,14 +294,30 @@ static int gsr_capture_nvfbc_setup_session(gsr_capture_nvfbc *cap_nvfbc) { return 0; } +static void gsr_capture_nvfbc_stop(gsr_capture_nvfbc *cap_nvfbc) { + gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); + gsr_capture_base_stop(&cap_nvfbc->base); + gsr_cuda_unload(&cap_nvfbc->cuda); + if(cap_nvfbc->library) { + dlclose(cap_nvfbc->library); + cap_nvfbc->library = NULL; + } + if(cap_nvfbc->params.display_to_capture) { + free((void*)cap_nvfbc->params.display_to_capture); + cap_nvfbc->params.display_to_capture = NULL; + } +} + static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; cap_nvfbc->base.video_codec_context = video_codec_context; cap_nvfbc->base.egl = cap_nvfbc->params.egl; - if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock)) - return -1; + if(!cap_nvfbc->params.use_software_video_encoder) { + if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock)) + return -1; + } if(!gsr_capture_nvfbc_load_library(cap)) { gsr_cuda_unload(&cap_nvfbc->cuda); @@ -366,18 +375,26 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec frame->width = video_codec_context->width; frame->height = video_codec_context->height; - if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream)) - goto error_cleanup; + if(cap_nvfbc->params.use_software_video_encoder) { + if(!gsr_capture_base_setup_textures(&cap_nvfbc->base, frame, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr, true)) { + goto error_cleanup; + } + } else { + if(!cap_nvfbc->params.use_software_video_encoder) { + if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream)) + goto error_cleanup; + } - gsr_cuda_context cuda_context = { - .cuda = &cap_nvfbc->cuda, - .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources, - .mapped_arrays = cap_nvfbc->mapped_arrays - }; + gsr_cuda_context cuda_context = { + .cuda = &cap_nvfbc->cuda, + .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources, + .mapped_arrays = cap_nvfbc->mapped_arrays + }; - // TODO: Remove this, it creates shit we dont need - if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) { - goto error_cleanup; + // TODO: Remove this, it creates shit we dont need + if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) { + goto error_cleanup; + } } /* Disable vsync */ set_vertical_sync_enabled(cap_nvfbc->params.egl, 0); @@ -385,9 +402,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec return 0; error_cleanup: - gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); - gsr_capture_base_stop(&cap_nvfbc->base); - gsr_cuda_unload(&cap_nvfbc->cuda); + gsr_capture_nvfbc_stop(cap_nvfbc); return -1; } @@ -443,32 +458,44 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { (vec2i){0, 0}, (vec2i){frame->width, frame->height}, 0.0f, false); - cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); - - // TODO: HDR is broken - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size - for(int i = 0; i < 2; ++i) { - CUDA_MEMCPY2D memcpy_struct; - memcpy_struct.srcXInBytes = 0; - memcpy_struct.srcY = 0; - memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; - - memcpy_struct.dstXInBytes = 0; - memcpy_struct.dstY = 0; - memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; - - memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i]; - memcpy_struct.srcPitch = frame->width / div[i]; - memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; - memcpy_struct.dstPitch = frame->linesize[i]; - memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1); - memcpy_struct.Height = frame->height / div[i]; - // TODO: Remove this copy if possible - cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream); - } + if(cap_nvfbc->params.use_software_video_encoder) { + // TODO: Hdr? + const unsigned int formats[2] = { GL_RED, GL_RG }; + for(int i = 0; i < 2; ++i) { + cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, cap_nvfbc->base.target_textures[i]); + cap_nvfbc->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); + } + cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); + } else { + cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); + + // TODO: HDR is broken + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + for(int i = 0; i < 2; ++i) { + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i]; + memcpy_struct.srcPitch = frame->width / div[i]; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; + memcpy_struct.dstPitch = frame->linesize[i]; + memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1); + memcpy_struct.Height = frame->height / div[i]; + // TODO: Remove this copy if possible + cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream); + } - // TODO: needed? - cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream); + // TODO: needed? + cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream); + } return 0; } @@ -476,16 +503,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { (void)video_codec_context; gsr_capture_nvfbc *cap_nvfbc = cap->priv; - gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); - if(cap_nvfbc) { - gsr_capture_base_stop(&cap_nvfbc->base); - gsr_cuda_unload(&cap_nvfbc->cuda); - dlclose(cap_nvfbc->library); - free((void*)cap_nvfbc->params.display_to_capture); - cap_nvfbc->params.display_to_capture = NULL; - free(cap->priv); - cap->priv = NULL; - } + gsr_capture_nvfbc_stop(cap_nvfbc); free(cap); } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c index 3240ed8..eb17d79 100644 --- a/src/capture/xcomposite.c +++ b/src/capture/xcomposite.c @@ -343,7 +343,6 @@ int gsr_capture_xcomposite_capture(gsr_capture_xcomposite *self, AVFrame *frame) } } - self->params.egl->eglSwapBuffers(self->params.egl->egl_display, self->params.egl->egl_surface); //self->params.egl->glFlush(); //self->params.egl->glFinish(); diff --git a/src/capture/xcomposite_cuda.c b/src/capture/xcomposite_cuda.c index c436221..b661fb5 100644 --- a/src/capture/xcomposite_cuda.c +++ b/src/capture/xcomposite_cuda.c @@ -96,6 +96,8 @@ static int gsr_capture_xcomposite_cuda_capture(gsr_capture *cap, AVFrame *frame) gsr_capture_xcomposite_capture(&cap_xcomp->xcomposite, frame); + cap_xcomp->xcomposite.params.egl->eglSwapBuffers(cap_xcomp->xcomposite.params.egl->egl_display, cap_xcomp->xcomposite.params.egl->egl_surface); + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size for(int i = 0; i < 2; ++i) { CUDA_MEMCPY2D memcpy_struct; diff --git a/src/capture/xcomposite_software.c b/src/capture/xcomposite_software.c new file mode 100644 index 0000000..4284cb3 --- /dev/null +++ b/src/capture/xcomposite_software.c @@ -0,0 +1,113 @@ +#include "../../include/capture/xcomposite_software.h" +#include +#include +#include +#include + +typedef struct { + gsr_capture_xcomposite xcomposite; +} gsr_capture_xcomposite_software; + +static void gsr_capture_xcomposite_software_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +static int gsr_capture_xcomposite_software_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + + const int res = gsr_capture_xcomposite_start(&cap_xcomp->xcomposite, video_codec_context, frame); + if(res != 0) { + gsr_capture_xcomposite_software_stop(cap, video_codec_context); + return res; + } + + if(!gsr_capture_base_setup_textures(&cap_xcomp->xcomposite.base, frame, cap_xcomp->xcomposite.params.color_range, GSR_SOURCE_COLOR_RGB, false, false)) { + gsr_capture_xcomposite_software_stop(cap, video_codec_context); + return -1; + } + + return 0; +} + +static void gsr_capture_xcomposite_software_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + (void)video_codec_context; + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + gsr_capture_xcomposite_stop(&cap_xcomp->xcomposite); +} + +static void gsr_capture_xcomposite_software_tick(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + gsr_capture_xcomposite_tick(&cap_xcomp->xcomposite, video_codec_context); +} + +static bool gsr_capture_xcomposite_software_is_damaged(gsr_capture *cap) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + return gsr_capture_xcomposite_is_damaged(&cap_xcomp->xcomposite); +} + +static void gsr_capture_xcomposite_software_clear_damage(gsr_capture *cap) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + gsr_capture_xcomposite_clear_damage(&cap_xcomp->xcomposite); +} + +static bool gsr_capture_xcomposite_software_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + return gsr_capture_xcomposite_should_stop(&cap_xcomp->xcomposite, err); +} + +static int gsr_capture_xcomposite_software_capture(gsr_capture *cap, AVFrame *frame) { + gsr_capture_xcomposite_software *cap_xcomp = cap->priv; + + gsr_capture_xcomposite_capture(&cap_xcomp->xcomposite, frame); + + const unsigned int formats[2] = { GL_RED, GL_RG }; + for(int i = 0; i < 2; ++i) { + cap_xcomp->xcomposite.params.egl->glBindTexture(GL_TEXTURE_2D, cap_xcomp->xcomposite.base.target_textures[i]); + cap_xcomp->xcomposite.params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); + } + cap_xcomp->xcomposite.params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->xcomposite.params.egl->eglSwapBuffers(cap_xcomp->xcomposite.params.egl->egl_display, cap_xcomp->xcomposite.params.egl->egl_surface); + + return 0; +} + +static void gsr_capture_xcomposite_software_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + if(cap->priv) { + gsr_capture_xcomposite_software_stop(cap, video_codec_context); + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_xcomposite_software_create(const gsr_capture_xcomposite_software_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_software_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_xcomposite_software *cap_xcomp = calloc(1, sizeof(gsr_capture_xcomposite_software)); + if(!cap_xcomp) { + free(cap); + return NULL; + } + + gsr_capture_xcomposite_init(&cap_xcomp->xcomposite, ¶ms->base); + + *cap = (gsr_capture) { + .start = gsr_capture_xcomposite_software_start, + .tick = gsr_capture_xcomposite_software_tick, + .is_damaged = gsr_capture_xcomposite_software_is_damaged, + .clear_damage = gsr_capture_xcomposite_software_clear_damage, + .should_stop = gsr_capture_xcomposite_software_should_stop, + .capture = gsr_capture_xcomposite_software_capture, + .capture_end = NULL, + .destroy = gsr_capture_xcomposite_software_destroy, + .priv = cap_xcomp + }; + + return cap; +} diff --git a/src/capture/xcomposite_vaapi.c b/src/capture/xcomposite_vaapi.c index 3f27014..3a14f30 100644 --- a/src/capture/xcomposite_vaapi.c +++ b/src/capture/xcomposite_vaapi.c @@ -25,7 +25,7 @@ static int gsr_capture_xcomposite_vaapi_start(gsr_capture *cap, AVCodecContext * return res; } - if(!drm_create_codec_context(cap_xcomp->xcomposite.params.egl->card_path, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_xcomp->va_dpy)) { + if(!vaapi_create_codec_context(cap_xcomp->xcomposite.params.egl->card_path, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_xcomp->va_dpy)) { gsr_capture_xcomposite_vaapi_stop(cap, video_codec_context); return -1; } @@ -60,7 +60,9 @@ static bool gsr_capture_xcomposite_vaapi_should_stop(gsr_capture *cap, bool *err static int gsr_capture_xcomposite_vaapi_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_xcomposite_vaapi *cap_xcomp = cap->priv; - return gsr_capture_xcomposite_capture(&cap_xcomp->xcomposite, frame); + gsr_capture_xcomposite_capture(&cap_xcomp->xcomposite, frame); + cap_xcomp->xcomposite.params.egl->eglSwapBuffers(cap_xcomp->xcomposite.params.egl->egl_display, cap_xcomp->xcomposite.params.egl->egl_surface); + return 0; } static void gsr_capture_xcomposite_vaapi_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { diff --git a/src/color_conversion.c b/src/color_conversion.c index cd0397e..f4298d6 100644 --- a/src/color_conversion.c +++ b/src/color_conversion.c @@ -234,7 +234,7 @@ static int create_vertices(gsr_color_conversion *self) { self->params.egl->glGenBuffers(1, &self->vertex_buffer_object_id); self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, self->vertex_buffer_object_id); - self->params.egl->glBufferData(GL_ARRAY_BUFFER, 24 * sizeof(float), NULL, GL_STREAM_DRAW); + self->params.egl->glBufferData(GL_ARRAY_BUFFER, 24 * sizeof(float), NULL, GL_DYNAMIC_DRAW); self->params.egl->glEnableVertexAttribArray(0); self->params.egl->glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); diff --git a/src/egl.c b/src/egl.c index 552d5f4..ec9ad07 100644 --- a/src/egl.c +++ b/src/egl.c @@ -418,6 +418,7 @@ static bool gsr_egl_load_gl(gsr_egl *self, void *library) { { (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" }, { (void**)&self->glTexImage2D, "glTexImage2D" }, { (void**)&self->glCopyImageSubData, "glCopyImageSubData" }, + { (void**)&self->glGetTexImage, "glGetTexImage" }, { (void**)&self->glClearTexImage, "glClearTexImage" }, { (void**)&self->glGenFramebuffers, "glGenFramebuffers" }, { (void**)&self->glBindFramebuffer, "glBindFramebuffer" }, diff --git a/src/main.cpp b/src/main.cpp index 1d479fe..9405dae 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,8 +2,10 @@ extern "C" { #include "../include/capture/nvfbc.h" #include "../include/capture/xcomposite_cuda.h" #include "../include/capture/xcomposite_vaapi.h" +#include "../include/capture/xcomposite_software.h" #include "../include/capture/kms_vaapi.h" #include "../include/capture/kms_cuda.h" +#include "../include/capture/kms_software.h" #include "../include/egl.h" #include "../include/utils.h" #include "../include/color_conversion.h" @@ -530,6 +532,10 @@ static bool check_if_codec_valid_for_hardware(const AVCodec *codec, gsr_gpu_vend return success; } +static const AVCodec* find_h264_software_encoder() { + return avcodec_find_encoder_by_name("libx264"); +} + static const AVCodec* find_h264_encoder(gsr_gpu_vendor vendor, const char *card_path) { const AVCodec *codec = avcodec_find_encoder_by_name(vendor == GSR_GPU_VENDOR_NVIDIA ? "h264_nvenc" : "h264_vaapi"); if(!codec) @@ -628,7 +634,81 @@ static AVFrame* create_audio_frame(AVCodecContext *audio_codec_context) { return frame; } -static void open_video(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr) { +static void open_video_software(AVCodecContext *codec_context, VideoQuality video_quality, PixelFormat pixel_format, bool hdr) { + (void)pixel_format; // TODO: + AVDictionary *options = nullptr; + + const float qp_multiply = hdr ? 8.0f/10.0f : 1.0f; + if(codec_context->codec_id == AV_CODEC_ID_AV1) { + switch(video_quality) { + case VideoQuality::MEDIUM: + av_dict_set_int(&options, "qp", 37 * qp_multiply, 0); + break; + case VideoQuality::HIGH: + av_dict_set_int(&options, "qp", 32 * qp_multiply, 0); + break; + case VideoQuality::VERY_HIGH: + av_dict_set_int(&options, "qp", 28 * qp_multiply, 0); + break; + case VideoQuality::ULTRA: + av_dict_set_int(&options, "qp", 24 * qp_multiply, 0); + break; + } + } else if(codec_context->codec_id == AV_CODEC_ID_H264) { + switch(video_quality) { + case VideoQuality::MEDIUM: + av_dict_set_int(&options, "qp", 34 * qp_multiply, 0); + break; + case VideoQuality::HIGH: + av_dict_set_int(&options, "qp", 30 * qp_multiply, 0); + break; + case VideoQuality::VERY_HIGH: + av_dict_set_int(&options, "qp", 26 * qp_multiply, 0); + break; + case VideoQuality::ULTRA: + av_dict_set_int(&options, "qp", 22 * qp_multiply, 0); + break; + } + } else { + switch(video_quality) { + case VideoQuality::MEDIUM: + av_dict_set_int(&options, "qp", 37 * qp_multiply, 0); + break; + case VideoQuality::HIGH: + av_dict_set_int(&options, "qp", 32 * qp_multiply, 0); + break; + case VideoQuality::VERY_HIGH: + av_dict_set_int(&options, "qp", 28 * qp_multiply, 0); + break; + case VideoQuality::ULTRA: + av_dict_set_int(&options, "qp", 24 * qp_multiply, 0); + break; + } + } + + av_dict_set(&options, "preset", "medium", 0); + if(hdr) { + av_dict_set(&options, "profile", "high10", 0); + } else { + av_dict_set(&options, "profile", "high", 0); + } + // TODO: If streaming or piping output set this to zerolatency + av_dict_set(&options, "tune", "fastdecode", 0); + + if(codec_context->codec_id == AV_CODEC_ID_H264) { + av_dict_set(&options, "coder", "cabac", 0); // TODO: cavlc is faster than cabac but worse compression. Which to use? + } + + av_dict_set(&options, "strict", "experimental", 0); + + int ret = avcodec_open2(codec_context, codec_context->codec, &options); + if (ret < 0) { + fprintf(stderr, "Error: Could not open video codec: %s\n", av_error_to_string(ret)); + _exit(1); + } +} + +static void open_video_hardware(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gsr_gpu_vendor vendor, PixelFormat pixel_format, bool hdr) { (void)very_old_gpu; AVDictionary *options = nullptr; // 8 bit / 10 bit = 80% @@ -725,6 +805,8 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality av_dict_set(&options, "tune", "hq", 0); av_dict_set(&options, "rc", "constqp", 0); + // TODO: Enable multipass + if(codec_context->codec_id == AV_CODEC_ID_H264) { switch(pixel_format) { case PixelFormat::YUV420: @@ -824,7 +906,7 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality static void usage_header() { const bool inside_flatpak = getenv("FLATPAK_ID") != NULL; const char *program_name = inside_flatpak ? "flatpak run --command=gpu-screen-recorder com.dec05eba.gpu_screen_recorder" : "gpu-screen-recorder"; - fprintf(stderr, "usage: %s -w [-c ] [-s WxH] -f [-a ] [-q ] [-r ] [-k h264|hevc|hevc_hdr|av1|av1_hdr] [-ac aac|opus|flac] [-ab ] [-oc yes|no] [-fm cfr|vfr|content] [-cr limited|full] [-mf yes|no] [-sc ] [-cursor yes|no] [-keyint ] [-o ] [-v yes|no] [-h|--help]\n", program_name); + fprintf(stderr, "usage: %s -w [-c ] [-s WxH] -f [-a ] [-q ] [-r ] [-k h264|hevc|hevc_hdr|av1|av1_hdr] [-ac aac|opus|flac] [-ab ] [-oc yes|no] [-fm cfr|vfr|content] [-cr limited|full] [-mf yes|no] [-sc ] [-cursor yes|no] [-keyint ] [-encoder gpu|cpu] [-o ] [-v yes|no] [-h|--help]\n", program_name); } static void usage_full() { @@ -863,7 +945,7 @@ static void usage_full() { fprintf(stderr, " and the video will only be saved when the gpu-screen-recorder is closed. This feature is similar to Nvidia's instant replay feature.\n"); fprintf(stderr, " This option has be between 5 and 1200. Note that the replay buffer size will not always be precise, because of keyframes. Optional, disabled by default.\n"); fprintf(stderr, "\n"); - fprintf(stderr, " -k Video codec to use. Should be either 'auto', 'h264', 'hevc', 'av1', 'hevc_hdr' or 'av1_hdr'. Defaults to 'auto' which defaults to 'h264'.\n"); + fprintf(stderr, " -k Video codec to use. Should be either 'auto', 'h264', 'hevc', 'av1', 'hevc_hdr' or 'av1_hdr'. Optional, defaults to 'auto' which defaults to 'h264'.\n"); fprintf(stderr, " Forcefully set to 'h264' if the file container type is 'flv'.\n"); fprintf(stderr, " 'hevc_hdr' and 'av1_hdr' option is not available on X11.\n"); fprintf(stderr, " Note: hdr metadata is not included in the video when recording with 'hevc_hdr'/'av1_hdr' because of bugs in AMD, Intel and NVIDIA drivers (amazin', they are all bugged).\n"); @@ -872,8 +954,8 @@ static void usage_full() { fprintf(stderr, " 'opus' and 'flac' is only supported by .mp4/.mkv files. 'opus' is recommended for best performance and smallest audio size.\n"); fprintf(stderr, " Flac audio codec is option is disable at the moment because of a temporary issue.\n"); fprintf(stderr, "\n"); - fprintf(stderr, " -ab Audio bitrate to use. Optional, by default the bitrate is 128000 for opus and flac and 160000 for aac.\n"); - fprintf(stderr, " If this is set to 0 then it's the same as if it's absent, in which case the bitrate is determined automatically depending on the audio codec.\n"); + fprintf(stderr, " -ab Audio bitrate to use. If this is set to 0 then it's the same as if it's absent, in which case the bitrate is determined automatically depending on the audio codec.\n"); + fprintf(stderr, " Optional, by default the bitrate is 128000 for opus and flac and 160000 for aac.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -oc Overclock memory transfer rate to the maximum performance level. This only applies to NVIDIA on X11 and exists to overcome a bug in NVIDIA driver where performance level\n"); fprintf(stderr, " is dropped when you record a game. Only needed if you are recording a game that is bottlenecked by GPU. The same issue exists on Wayland but overclocking is not possible on Wayland.\n"); @@ -899,6 +981,9 @@ static void usage_full() { fprintf(stderr, " This also affects seeking in the video and may affect how the replay video is cut. If this is set to 10 for example then you can only seek in 10-second chunks in the video.\n"); fprintf(stderr, " Setting this to a higher value reduces the video file size if you are ok with the previously described downside. This option is expected to be a floating point number.\n"); fprintf(stderr, " By default this value is set to 2.0.\n"); + fprintf(stderr, " -encoder\n"); + fprintf(stderr, " Which device should be used for video encoding. Should either be 'gpu' or 'cpu'. Does currently only work with h264 codec option (-k).\n"); + fprintf(stderr, " Optional, set to 'gpu' by default.\n"); fprintf(stderr, "\n"); fprintf(stderr, " --list-supported-video-codecs\n"); fprintf(stderr, " List supported video codecs and exits. Prints h264, hevc, hevc_hdr, av1 and av1_hdr (if supported).\n"); @@ -1479,7 +1564,7 @@ static void list_supported_video_codecs() { XCloseDisplay(dpy); } -static gsr_capture* create_capture_impl(const char *window_str, const char *screen_region, bool wayland, gsr_egl &egl, int fps, bool overclock, VideoCodec video_codec, gsr_color_range color_range, bool record_cursor, bool track_damage) { +static gsr_capture* create_capture_impl(const char *window_str, const char *screen_region, bool wayland, gsr_egl &egl, int fps, bool overclock, VideoCodec video_codec, gsr_color_range color_range, bool record_cursor, bool track_damage, bool use_software_video_encoder) { vec2i region_size = { 0, 0 }; Window src_window_id = None; bool follow_focused = false; @@ -1545,57 +1630,70 @@ static gsr_capture* create_capture_impl(const char *window_str, const char *scre } } - if(egl.gpu_info.vendor == GSR_GPU_VENDOR_NVIDIA) { - if(wayland) { - gsr_capture_kms_cuda_params kms_params; + if(use_software_video_encoder && (wayland || egl.gpu_info.vendor != GSR_GPU_VENDOR_NVIDIA)) { + gsr_capture_kms_software_params kms_params; + kms_params.egl = &egl; + kms_params.display_to_capture = window_str; + kms_params.hdr = video_codec_is_hdr(video_codec); + kms_params.color_range = color_range; + kms_params.record_cursor = record_cursor; + capture = gsr_capture_kms_software_create(&kms_params); + if(!capture) + _exit(1); + } else { + if(egl.gpu_info.vendor == GSR_GPU_VENDOR_NVIDIA) { + if(wayland) { + gsr_capture_kms_cuda_params kms_params; + kms_params.egl = &egl; + kms_params.display_to_capture = window_str; + kms_params.hdr = video_codec_is_hdr(video_codec); + kms_params.color_range = color_range; + kms_params.record_cursor = record_cursor; + capture = gsr_capture_kms_cuda_create(&kms_params); + if(!capture) + _exit(1); + } else { + const char *capture_target = window_str; + bool direct_capture = strcmp(window_str, "screen-direct") == 0; + if(direct_capture) { + capture_target = "screen"; + // TODO: Temporary disable direct capture because push model causes stuttering when it's direct capturing. This might be a nvfbc bug. This does not happen when using a compositor. + direct_capture = false; + fprintf(stderr, "Warning: screen-direct has temporary been disabled as it causes stuttering. This is likely a NvFBC bug. Falling back to \"screen\".\n"); + } + + if(strcmp(window_str, "screen-direct-force") == 0) { + direct_capture = true; + capture_target = "screen"; + } + + gsr_capture_nvfbc_params nvfbc_params; + nvfbc_params.egl = &egl; + nvfbc_params.display_to_capture = capture_target; + nvfbc_params.fps = fps; + nvfbc_params.pos = { 0, 0 }; + nvfbc_params.size = { 0, 0 }; + nvfbc_params.direct_capture = direct_capture; + nvfbc_params.overclock = overclock; + nvfbc_params.hdr = video_codec_is_hdr(video_codec); + nvfbc_params.color_range = color_range; + nvfbc_params.record_cursor = record_cursor; + nvfbc_params.use_software_video_encoder = use_software_video_encoder; + capture = gsr_capture_nvfbc_create(&nvfbc_params); + if(!capture) + _exit(1); + } + } else { + gsr_capture_kms_vaapi_params kms_params; kms_params.egl = &egl; kms_params.display_to_capture = window_str; kms_params.hdr = video_codec_is_hdr(video_codec); kms_params.color_range = color_range; kms_params.record_cursor = record_cursor; - capture = gsr_capture_kms_cuda_create(&kms_params); - if(!capture) - _exit(1); - } else { - const char *capture_target = window_str; - bool direct_capture = strcmp(window_str, "screen-direct") == 0; - if(direct_capture) { - capture_target = "screen"; - // TODO: Temporary disable direct capture because push model causes stuttering when it's direct capturing. This might be a nvfbc bug. This does not happen when using a compositor. - direct_capture = false; - fprintf(stderr, "Warning: screen-direct has temporary been disabled as it causes stuttering. This is likely a NvFBC bug. Falling back to \"screen\".\n"); - } - - if(strcmp(window_str, "screen-direct-force") == 0) { - direct_capture = true; - capture_target = "screen"; - } - - gsr_capture_nvfbc_params nvfbc_params; - nvfbc_params.egl = &egl; - nvfbc_params.display_to_capture = capture_target; - nvfbc_params.fps = fps; - nvfbc_params.pos = { 0, 0 }; - nvfbc_params.size = { 0, 0 }; - nvfbc_params.direct_capture = direct_capture; - nvfbc_params.overclock = overclock; - nvfbc_params.hdr = video_codec_is_hdr(video_codec); - nvfbc_params.color_range = color_range; - nvfbc_params.record_cursor = record_cursor; - capture = gsr_capture_nvfbc_create(&nvfbc_params); + capture = gsr_capture_kms_vaapi_create(&kms_params); if(!capture) _exit(1); } - } else { - gsr_capture_kms_vaapi_params kms_params; - kms_params.egl = &egl; - kms_params.display_to_capture = window_str; - kms_params.hdr = video_codec_is_hdr(video_codec); - kms_params.color_range = color_range; - kms_params.record_cursor = record_cursor; - capture = gsr_capture_kms_vaapi_create(&kms_params); - if(!capture) - _exit(1); } } else { if(wayland) { @@ -1612,36 +1710,50 @@ static gsr_capture* create_capture_impl(const char *window_str, const char *scre } if(!capture) { - switch(egl.gpu_info.vendor) { - case GSR_GPU_VENDOR_AMD: - case GSR_GPU_VENDOR_INTEL: { - gsr_capture_xcomposite_vaapi_params xcomposite_params; - xcomposite_params.base.egl = &egl; - xcomposite_params.base.window = src_window_id; - xcomposite_params.base.follow_focused = follow_focused; - xcomposite_params.base.region_size = region_size; - xcomposite_params.base.color_range = color_range; - xcomposite_params.base.record_cursor = record_cursor; - xcomposite_params.base.track_damage = track_damage; - capture = gsr_capture_xcomposite_vaapi_create(&xcomposite_params); - if(!capture) - _exit(1); - break; - } - case GSR_GPU_VENDOR_NVIDIA: { - gsr_capture_xcomposite_cuda_params xcomposite_params; - xcomposite_params.base.egl = &egl; - xcomposite_params.base.window = src_window_id; - xcomposite_params.base.follow_focused = follow_focused; - xcomposite_params.base.region_size = region_size; - xcomposite_params.base.color_range = color_range; - xcomposite_params.base.record_cursor = record_cursor; - xcomposite_params.base.track_damage = track_damage; - xcomposite_params.overclock = overclock; - capture = gsr_capture_xcomposite_cuda_create(&xcomposite_params); - if(!capture) - _exit(1); - break; + if(use_software_video_encoder) { + gsr_capture_xcomposite_software_params xcomposite_params; + xcomposite_params.base.egl = &egl; + xcomposite_params.base.window = src_window_id; + xcomposite_params.base.follow_focused = follow_focused; + xcomposite_params.base.region_size = region_size; + xcomposite_params.base.color_range = color_range; + xcomposite_params.base.record_cursor = record_cursor; + xcomposite_params.base.track_damage = track_damage; + capture = gsr_capture_xcomposite_software_create(&xcomposite_params); + if(!capture) + _exit(1); + } else { + switch(egl.gpu_info.vendor) { + case GSR_GPU_VENDOR_AMD: + case GSR_GPU_VENDOR_INTEL: { + gsr_capture_xcomposite_vaapi_params xcomposite_params; + xcomposite_params.base.egl = &egl; + xcomposite_params.base.window = src_window_id; + xcomposite_params.base.follow_focused = follow_focused; + xcomposite_params.base.region_size = region_size; + xcomposite_params.base.color_range = color_range; + xcomposite_params.base.record_cursor = record_cursor; + xcomposite_params.base.track_damage = track_damage; + capture = gsr_capture_xcomposite_vaapi_create(&xcomposite_params); + if(!capture) + _exit(1); + break; + } + case GSR_GPU_VENDOR_NVIDIA: { + gsr_capture_xcomposite_cuda_params xcomposite_params; + xcomposite_params.base.egl = &egl; + xcomposite_params.base.window = src_window_id; + xcomposite_params.base.follow_focused = follow_focused; + xcomposite_params.base.region_size = region_size; + xcomposite_params.base.color_range = color_range; + xcomposite_params.base.record_cursor = record_cursor; + xcomposite_params.base.track_damage = track_damage; + xcomposite_params.overclock = overclock; + capture = gsr_capture_xcomposite_cuda_create(&xcomposite_params); + if(!capture) + _exit(1); + break; + } } } } @@ -1649,6 +1761,14 @@ static gsr_capture* create_capture_impl(const char *window_str, const char *scre return capture; } +static AVPixelFormat get_pixel_format(gsr_gpu_vendor vendor, bool use_software_video_encoder) { + if(use_software_video_encoder) { + return AV_PIX_FMT_NV12; + } else { + return vendor == GSR_GPU_VENDOR_NVIDIA ? AV_PIX_FMT_CUDA : AV_PIX_FMT_VAAPI; + } +} + struct Arg { std::vector values; bool optional = false; @@ -1715,6 +1835,7 @@ int main(int argc, char **argv) { { "-cursor", Arg { {}, true, false } }, { "-gopm", Arg { {}, true, false } }, // deprecated, used keyint instead { "-keyint", Arg { {}, true, false } }, + { "-encoder", Arg { {}, true, false } }, }; for(int i = 1; i < argc; i += 2) { @@ -1796,7 +1917,6 @@ int main(int argc, char **argv) { } float keyint = 2.0; - const char *gopm_str = args["-gopm"].value(); const char *keyint_str = args["-keyint"].value(); if(keyint_str) { if(sscanf(keyint_str, "%f", &keyint) != 1) { @@ -1808,18 +1928,19 @@ int main(int argc, char **argv) { fprintf(stderr, "Error: -keyint is expected to be 0 or larger\n"); usage(); } - } else if(gopm_str) { - if(sscanf(gopm_str, "%f", &keyint) != 1) { - fprintf(stderr, "Error: -gopm argument \"%s\" is not a floating point number\n", gopm_str); - usage(); - } + } - if(keyint < 0) { - fprintf(stderr, "Error: -gopm is expected to be 0 or larger\n"); + bool use_software_video_encoder = false; + const char *encoder_str = args["-encoder"].value(); + if(encoder_str) { + if(strcmp(encoder_str, "gpu") == 0) { + use_software_video_encoder = false; + } else if(strcmp(encoder_str, "cpu") == 0) { + use_software_video_encoder = true; + } else { + fprintf(stderr, "Error: -encoder is expected to be 'gpu' or 'cpu', was '%s'\n", encoder_str); usage(); } - - fprintf(stderr, "Warning: -gopm argument is deprecated, use -keyint instead\n"); } bool overclock = false; @@ -2198,16 +2319,9 @@ int main(int argc, char **argv) { const bool video_codec_auto = strcmp(video_codec_to_use, "auto") == 0; if(video_codec_auto) { - const AVCodec *h264_codec = find_h264_encoder(egl.gpu_info.vendor, egl.card_path); - if(!h264_codec) { - fprintf(stderr, "Info: using hevc encoder because a codec was not specified and your gpu does not support h264\n"); - video_codec_to_use = "hevc"; - video_codec = VideoCodec::HEVC; - } else { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; - } + fprintf(stderr, "Info: using h264 encoder because a codec was not specified\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; } // TODO: Allow hevc, vp9 and av1 in (enhanced) flv (supported since ffmpeg 6.1) @@ -2241,17 +2355,29 @@ int main(int argc, char **argv) { } } + if(use_software_video_encoder && video_codec != VideoCodec::H264) { + fprintf(stderr, "Error: \"-encoder cpu\" option is currently only available when using h264 codec option (-k)\n"); + usage(); + } + const AVCodec *video_codec_f = nullptr; switch(video_codec) { - case VideoCodec::H264: - video_codec_f = find_h264_encoder(egl.gpu_info.vendor, egl.card_path); + case VideoCodec::H264: { + if(use_software_video_encoder) { + video_codec_f = find_h264_software_encoder(); + } else { + video_codec_f = find_h264_encoder(egl.gpu_info.vendor, egl.card_path); + } break; + } case VideoCodec::HEVC: case VideoCodec::HEVC_HDR: + // TODO: software encoder video_codec_f = find_hevc_encoder(egl.gpu_info.vendor, egl.card_path); break; case VideoCodec::AV1: case VideoCodec::AV1_HDR: + // TODO: software encoder video_codec_f = find_av1_encoder(egl.gpu_info.vendor, egl.card_path); break; } @@ -2315,7 +2441,7 @@ int main(int argc, char **argv) { _exit(2); } - gsr_capture *capture = create_capture_impl(window_str, screen_region, wayland, egl, fps, overclock, video_codec, color_range, record_cursor, framerate_mode == FramerateMode::CONTENT); + gsr_capture *capture = create_capture_impl(window_str, screen_region, wayland, egl, fps, overclock, video_codec, color_range, record_cursor, framerate_mode == FramerateMode::CONTENT, use_software_video_encoder); // (Some?) livestreaming services require at least one audio track to work. // If not audio is provided then create one silent audio track. @@ -2336,7 +2462,7 @@ int main(int argc, char **argv) { const bool hdr = video_codec_is_hdr(video_codec); const bool low_latency_recording = is_livestream || is_output_piped; - AVCodecContext *video_codec_context = create_video_codec_context(egl.gpu_info.vendor == GSR_GPU_VENDOR_NVIDIA ? AV_PIX_FMT_CUDA : AV_PIX_FMT_VAAPI, quality, fps, video_codec_f, low_latency_recording, egl.gpu_info.vendor, framerate_mode, hdr, color_range, keyint); + AVCodecContext *video_codec_context = create_video_codec_context(get_pixel_format(egl.gpu_info.vendor, use_software_video_encoder), quality, fps, video_codec_f, low_latency_recording, egl.gpu_info.vendor, framerate_mode, hdr, color_range, keyint); if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); @@ -2360,7 +2486,11 @@ int main(int argc, char **argv) { _exit(capture_result); } - open_video(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr); + if(use_software_video_encoder) { + open_video_software(video_codec_context, quality, pixel_format, hdr); + } else { + open_video_hardware(video_codec_context, quality, very_old_gpu, egl.gpu_info.vendor, pixel_format, hdr); + } if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); -- cgit v1.2.3