aboutsummaryrefslogtreecommitdiff
path: root/src/capture/nvfbc.c
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2024-07-04 01:35:03 +0200
committerdec05eba <dec05eba@protonmail.com>2024-07-04 01:35:03 +0200
commit62d61fda12e3774fee6b671e44fb89bd2ef8da8c (patch)
treed5050689876d9adbe51bdf137475b3c3bc332c63 /src/capture/nvfbc.c
parentea7b58d7f92b179a4c2717237422e1200b5be673 (diff)
Add support for software video encoding with '-encoder cpu' option, currently h264 only
Diffstat (limited to 'src/capture/nvfbc.c')
-rw-r--r--src/capture/nvfbc.c132
1 files changed, 75 insertions, 57 deletions
diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c
index 9eabb18..6608aef 100644
--- a/src/capture/nvfbc.c
+++ b/src/capture/nvfbc.c
@@ -43,13 +43,6 @@ typedef struct {
double nvfbc_dead_start;
} gsr_capture_nvfbc;
-#if defined(_WIN64) || defined(__LP64__)
-typedef unsigned long long CUdeviceptr_v2;
-#else
-typedef unsigned int CUdeviceptr_v2;
-#endif
-typedef CUdeviceptr_v2 CUdeviceptr;
-
static int max_int(int a, int b) {
return a > b ? a : b;
}
@@ -301,14 +294,30 @@ static int gsr_capture_nvfbc_setup_session(gsr_capture_nvfbc *cap_nvfbc) {
return 0;
}
+static void gsr_capture_nvfbc_stop(gsr_capture_nvfbc *cap_nvfbc) {
+ gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
+ gsr_capture_base_stop(&cap_nvfbc->base);
+ gsr_cuda_unload(&cap_nvfbc->cuda);
+ if(cap_nvfbc->library) {
+ dlclose(cap_nvfbc->library);
+ cap_nvfbc->library = NULL;
+ }
+ if(cap_nvfbc->params.display_to_capture) {
+ free((void*)cap_nvfbc->params.display_to_capture);
+ cap_nvfbc->params.display_to_capture = NULL;
+ }
+}
+
static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) {
gsr_capture_nvfbc *cap_nvfbc = cap->priv;
cap_nvfbc->base.video_codec_context = video_codec_context;
cap_nvfbc->base.egl = cap_nvfbc->params.egl;
- if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock))
- return -1;
+ if(!cap_nvfbc->params.use_software_video_encoder) {
+ if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock))
+ return -1;
+ }
if(!gsr_capture_nvfbc_load_library(cap)) {
gsr_cuda_unload(&cap_nvfbc->cuda);
@@ -366,18 +375,26 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
frame->width = video_codec_context->width;
frame->height = video_codec_context->height;
- if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream))
- goto error_cleanup;
+ if(cap_nvfbc->params.use_software_video_encoder) {
+ if(!gsr_capture_base_setup_textures(&cap_nvfbc->base, frame, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr, true)) {
+ goto error_cleanup;
+ }
+ } else {
+ if(!cap_nvfbc->params.use_software_video_encoder) {
+ if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream))
+ goto error_cleanup;
+ }
- gsr_cuda_context cuda_context = {
- .cuda = &cap_nvfbc->cuda,
- .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources,
- .mapped_arrays = cap_nvfbc->mapped_arrays
- };
+ gsr_cuda_context cuda_context = {
+ .cuda = &cap_nvfbc->cuda,
+ .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources,
+ .mapped_arrays = cap_nvfbc->mapped_arrays
+ };
- // TODO: Remove this, it creates shit we dont need
- if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) {
- goto error_cleanup;
+ // TODO: Remove this, it creates shit we dont need
+ if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) {
+ goto error_cleanup;
+ }
}
/* Disable vsync */
set_vertical_sync_enabled(cap_nvfbc->params.egl, 0);
@@ -385,9 +402,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
return 0;
error_cleanup:
- gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
- gsr_capture_base_stop(&cap_nvfbc->base);
- gsr_cuda_unload(&cap_nvfbc->cuda);
+ gsr_capture_nvfbc_stop(cap_nvfbc);
return -1;
}
@@ -443,32 +458,44 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) {
(vec2i){0, 0}, (vec2i){frame->width, frame->height},
0.0f, false);
- cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
-
- // TODO: HDR is broken
- const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size
- for(int i = 0; i < 2; ++i) {
- CUDA_MEMCPY2D memcpy_struct;
- memcpy_struct.srcXInBytes = 0;
- memcpy_struct.srcY = 0;
- memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY;
-
- memcpy_struct.dstXInBytes = 0;
- memcpy_struct.dstY = 0;
- memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE;
-
- memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i];
- memcpy_struct.srcPitch = frame->width / div[i];
- memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i];
- memcpy_struct.dstPitch = frame->linesize[i];
- memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1);
- memcpy_struct.Height = frame->height / div[i];
- // TODO: Remove this copy if possible
- cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream);
- }
+ if(cap_nvfbc->params.use_software_video_encoder) {
+ // TODO: Hdr?
+ const unsigned int formats[2] = { GL_RED, GL_RG };
+ for(int i = 0; i < 2; ++i) {
+ cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, cap_nvfbc->base.target_textures[i]);
+ cap_nvfbc->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]);
+ }
+ cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, 0);
+
+ cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
+ } else {
+ cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window);
+
+ // TODO: HDR is broken
+ const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size
+ for(int i = 0; i < 2; ++i) {
+ CUDA_MEMCPY2D memcpy_struct;
+ memcpy_struct.srcXInBytes = 0;
+ memcpy_struct.srcY = 0;
+ memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY;
+
+ memcpy_struct.dstXInBytes = 0;
+ memcpy_struct.dstY = 0;
+ memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+
+ memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i];
+ memcpy_struct.srcPitch = frame->width / div[i];
+ memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i];
+ memcpy_struct.dstPitch = frame->linesize[i];
+ memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1);
+ memcpy_struct.Height = frame->height / div[i];
+ // TODO: Remove this copy if possible
+ cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream);
+ }
- // TODO: needed?
- cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream);
+ // TODO: needed?
+ cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream);
+ }
return 0;
}
@@ -476,16 +503,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) {
static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) {
(void)video_codec_context;
gsr_capture_nvfbc *cap_nvfbc = cap->priv;
- gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc);
- if(cap_nvfbc) {
- gsr_capture_base_stop(&cap_nvfbc->base);
- gsr_cuda_unload(&cap_nvfbc->cuda);
- dlclose(cap_nvfbc->library);
- free((void*)cap_nvfbc->params.display_to_capture);
- cap_nvfbc->params.display_to_capture = NULL;
- free(cap->priv);
- cap->priv = NULL;
- }
+ gsr_capture_nvfbc_stop(cap_nvfbc);
free(cap);
}