From 62d61fda12e3774fee6b671e44fb89bd2ef8da8c Mon Sep 17 00:00:00 2001 From: dec05eba Date: Thu, 4 Jul 2024 01:35:03 +0200 Subject: Add support for software video encoding with '-encoder cpu' option, currently h264 only --- src/capture/nvfbc.c | 132 +++++++++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 57 deletions(-) (limited to 'src/capture/nvfbc.c') diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index 9eabb18..6608aef 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -43,13 +43,6 @@ typedef struct { double nvfbc_dead_start; } gsr_capture_nvfbc; -#if defined(_WIN64) || defined(__LP64__) -typedef unsigned long long CUdeviceptr_v2; -#else -typedef unsigned int CUdeviceptr_v2; -#endif -typedef CUdeviceptr_v2 CUdeviceptr; - static int max_int(int a, int b) { return a > b ? a : b; } @@ -301,14 +294,30 @@ static int gsr_capture_nvfbc_setup_session(gsr_capture_nvfbc *cap_nvfbc) { return 0; } +static void gsr_capture_nvfbc_stop(gsr_capture_nvfbc *cap_nvfbc) { + gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); + gsr_capture_base_stop(&cap_nvfbc->base); + gsr_cuda_unload(&cap_nvfbc->cuda); + if(cap_nvfbc->library) { + dlclose(cap_nvfbc->library); + cap_nvfbc->library = NULL; + } + if(cap_nvfbc->params.display_to_capture) { + free((void*)cap_nvfbc->params.display_to_capture); + cap_nvfbc->params.display_to_capture = NULL; + } +} + static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; cap_nvfbc->base.video_codec_context = video_codec_context; cap_nvfbc->base.egl = cap_nvfbc->params.egl; - if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock)) - return -1; + if(!cap_nvfbc->params.use_software_video_encoder) { + if(!gsr_cuda_load(&cap_nvfbc->cuda, cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.overclock)) + return -1; + } if(!gsr_capture_nvfbc_load_library(cap)) { gsr_cuda_unload(&cap_nvfbc->cuda); @@ -366,18 +375,26 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec frame->width = video_codec_context->width; frame->height = video_codec_context->height; - if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream)) - goto error_cleanup; + if(cap_nvfbc->params.use_software_video_encoder) { + if(!gsr_capture_base_setup_textures(&cap_nvfbc->base, frame, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr, true)) { + goto error_cleanup; + } + } else { + if(!cap_nvfbc->params.use_software_video_encoder) { + if(!cuda_create_codec_context(cap_nvfbc->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_nvfbc->cuda_stream)) + goto error_cleanup; + } - gsr_cuda_context cuda_context = { - .cuda = &cap_nvfbc->cuda, - .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources, - .mapped_arrays = cap_nvfbc->mapped_arrays - }; + gsr_cuda_context cuda_context = { + .cuda = &cap_nvfbc->cuda, + .cuda_graphics_resources = cap_nvfbc->cuda_graphics_resources, + .mapped_arrays = cap_nvfbc->mapped_arrays + }; - // TODO: Remove this, it creates shit we dont need - if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) { - goto error_cleanup; + // TODO: Remove this, it creates shit we dont need + if(!gsr_capture_base_setup_cuda_textures(&cap_nvfbc->base, frame, &cuda_context, cap_nvfbc->params.color_range, GSR_SOURCE_COLOR_BGR, cap_nvfbc->params.hdr)) { + goto error_cleanup; + } } /* Disable vsync */ set_vertical_sync_enabled(cap_nvfbc->params.egl, 0); @@ -385,9 +402,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec return 0; error_cleanup: - gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); - gsr_capture_base_stop(&cap_nvfbc->base); - gsr_cuda_unload(&cap_nvfbc->cuda); + gsr_capture_nvfbc_stop(cap_nvfbc); return -1; } @@ -443,32 +458,44 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { (vec2i){0, 0}, (vec2i){frame->width, frame->height}, 0.0f, false); - cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); - - // TODO: HDR is broken - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size - for(int i = 0; i < 2; ++i) { - CUDA_MEMCPY2D memcpy_struct; - memcpy_struct.srcXInBytes = 0; - memcpy_struct.srcY = 0; - memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; - - memcpy_struct.dstXInBytes = 0; - memcpy_struct.dstY = 0; - memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; - - memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i]; - memcpy_struct.srcPitch = frame->width / div[i]; - memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; - memcpy_struct.dstPitch = frame->linesize[i]; - memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1); - memcpy_struct.Height = frame->height / div[i]; - // TODO: Remove this copy if possible - cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream); - } + if(cap_nvfbc->params.use_software_video_encoder) { + // TODO: Hdr? + const unsigned int formats[2] = { GL_RED, GL_RG }; + for(int i = 0; i < 2; ++i) { + cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, cap_nvfbc->base.target_textures[i]); + cap_nvfbc->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); + } + cap_nvfbc->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); + } else { + cap_nvfbc->params.egl->glXSwapBuffers(cap_nvfbc->params.egl->x11.dpy, cap_nvfbc->params.egl->x11.window); + + // TODO: HDR is broken + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + for(int i = 0; i < 2; ++i) { + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_nvfbc->mapped_arrays[i]; + memcpy_struct.srcPitch = frame->width / div[i]; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; + memcpy_struct.dstPitch = frame->linesize[i]; + memcpy_struct.WidthInBytes = frame->width * (cap_nvfbc->params.hdr ? 2 : 1); + memcpy_struct.Height = frame->height / div[i]; + // TODO: Remove this copy if possible + cap_nvfbc->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_nvfbc->cuda_stream); + } - // TODO: needed? - cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream); + // TODO: needed? + cap_nvfbc->cuda.cuStreamSynchronize(cap_nvfbc->cuda_stream); + } return 0; } @@ -476,16 +503,7 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { (void)video_codec_context; gsr_capture_nvfbc *cap_nvfbc = cap->priv; - gsr_capture_nvfbc_destroy_session_and_handle(cap_nvfbc); - if(cap_nvfbc) { - gsr_capture_base_stop(&cap_nvfbc->base); - gsr_cuda_unload(&cap_nvfbc->cuda); - dlclose(cap_nvfbc->library); - free((void*)cap_nvfbc->params.display_to_capture); - cap_nvfbc->params.display_to_capture = NULL; - free(cap->priv); - cap->priv = NULL; - } + gsr_capture_nvfbc_stop(cap_nvfbc); free(cap); } -- cgit v1.2.3