diff options
Diffstat (limited to 'src/capture/xcomposite_cuda.c')
-rw-r--r-- | src/capture/xcomposite_cuda.c | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/src/capture/xcomposite_cuda.c b/src/capture/xcomposite_cuda.c new file mode 100644 index 0000000..6e13d2a --- /dev/null +++ b/src/capture/xcomposite_cuda.c @@ -0,0 +1,155 @@ +#include "../../include/capture/xcomposite_cuda.h" +#include "../../include/cuda.h" +#include <stdio.h> +#include <stdlib.h> +#include <libavutil/frame.h> +#include <libavcodec/avcodec.h> + +typedef struct { + gsr_capture_xcomposite xcomposite; + bool overclock; + + gsr_cuda cuda; + CUgraphicsResource cuda_graphics_resources[2]; + CUarray mapped_arrays[2]; + CUstream cuda_stream; +} gsr_capture_xcomposite_cuda; + +static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +static int gsr_capture_xcomposite_cuda_start(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame *frame) { + gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + + const int res = gsr_capture_xcomposite_start(&cap_xcomp->xcomposite, video_codec_context, frame); + if(res != 0) { + gsr_capture_xcomposite_cuda_stop(cap, video_codec_context); + return res; + } + + if(!gsr_cuda_load(&cap_xcomp->cuda, cap_xcomp->xcomposite.params.egl->x11.dpy, cap_xcomp->overclock)) { + fprintf(stderr, "gsr error: gsr_capture_kms_cuda_start: failed to load cuda\n"); + gsr_capture_xcomposite_cuda_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_create_codec_context(cap_xcomp->cuda.cu_ctx, video_codec_context, video_codec_context->width, video_codec_context->height, false, &cap_xcomp->cuda_stream)) { + gsr_capture_xcomposite_cuda_stop(cap, video_codec_context); + return -1; + } + + gsr_cuda_context cuda_context = { + .cuda = &cap_xcomp->cuda, + .cuda_graphics_resources = cap_xcomp->cuda_graphics_resources, + .mapped_arrays = cap_xcomp->mapped_arrays + }; + + if(!gsr_capture_base_setup_cuda_textures(&cap_xcomp->xcomposite.base, frame, &cuda_context, cap_xcomp->xcomposite.params.color_range, GSR_SOURCE_COLOR_RGB, false)) { + gsr_capture_xcomposite_cuda_stop(cap, video_codec_context); + return -1; + } + + return 0; +} + +static void gsr_capture_xcomposite_unload_cuda_graphics(gsr_capture_xcomposite_cuda *cap_xcomp) { + if(cap_xcomp->cuda.cu_ctx) { + for(int i = 0; i < 2; ++i) { + if(cap_xcomp->cuda_graphics_resources[i]) { + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resources[i], 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resources[i]); + cap_xcomp->cuda_graphics_resources[i] = 0; + } + } + } +} + +static void gsr_capture_xcomposite_cuda_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + (void)video_codec_context; + gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + gsr_capture_xcomposite_stop(&cap_xcomp->xcomposite); + gsr_capture_xcomposite_unload_cuda_graphics(cap_xcomp); + gsr_cuda_unload(&cap_xcomp->cuda); +} + +static void gsr_capture_xcomposite_cuda_tick(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + gsr_capture_xcomposite_tick(&cap_xcomp->xcomposite, video_codec_context); +} + +static bool gsr_capture_xcomposite_cuda_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + return gsr_capture_xcomposite_should_stop(&cap_xcomp->xcomposite, err); +} + +static int gsr_capture_xcomposite_cuda_capture(gsr_capture *cap, AVFrame *frame) { + gsr_capture_xcomposite_cuda *cap_xcomp = cap->priv; + + gsr_capture_xcomposite_capture(&cap_xcomp->xcomposite, frame); + + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + for(int i = 0; i < 2; ++i) { + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_xcomp->mapped_arrays[i]; + memcpy_struct.srcPitch = frame->width / div[i]; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; + memcpy_struct.dstPitch = frame->linesize[i]; + memcpy_struct.WidthInBytes = frame->width; + memcpy_struct.Height = frame->height / div[i]; + // TODO: Remove this copy if possible + cap_xcomp->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, cap_xcomp->cuda_stream); + } + + // TODO: needed? + cap_xcomp->cuda.cuStreamSynchronize(cap_xcomp->cuda_stream); + + return 0; +} + +static void gsr_capture_xcomposite_cuda_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + if(cap->priv) { + gsr_capture_xcomposite_cuda_stop(cap, video_codec_context); + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_xcomposite_cuda_create(const gsr_capture_xcomposite_cuda_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_cuda_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_xcomposite_cuda *cap_xcomp = calloc(1, sizeof(gsr_capture_xcomposite_cuda)); + if(!cap_xcomp) { + free(cap); + return NULL; + } + + gsr_capture_xcomposite_init(&cap_xcomp->xcomposite, ¶ms->base); + cap_xcomp->overclock = params->overclock; + + *cap = (gsr_capture) { + .start = gsr_capture_xcomposite_cuda_start, + .tick = gsr_capture_xcomposite_cuda_tick, + .should_stop = gsr_capture_xcomposite_cuda_should_stop, + .capture = gsr_capture_xcomposite_cuda_capture, + .capture_end = NULL, + .destroy = gsr_capture_xcomposite_cuda_destroy, + .priv = cap_xcomp + }; + + return cap; +} |