From a7e0dbd83381377bd05a3fa988511d3713996370 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 16 Oct 2022 02:08:40 +0200 Subject: Refactor xcomposite into abstract capture api Refactor c++ files into c files, more usable --- src/capture/capture.c | 42 +++- src/capture/nvfbc.c | 151 +++++++++++--- src/capture/xcomposite.c | 517 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 671 insertions(+), 39 deletions(-) create mode 100644 src/capture/xcomposite.c (limited to 'src/capture') diff --git a/src/capture/capture.c b/src/capture/capture.c index 9755d6a..699745a 100644 --- a/src/capture/capture.c +++ b/src/capture/capture.c @@ -1,17 +1,47 @@ #include "../../include/capture/capture.h" +#include -int gsr_capture_start(gsr_capture *cap) { - return cap->start(cap); +int gsr_capture_start(gsr_capture *cap, AVCodecContext *video_codec_context) { + if(cap->started) + return -1; + + int res = cap->start(cap, video_codec_context); + if(res == 0) + cap->started = true; + + return res; } -void gsr_capture_stop(gsr_capture *cap) { - cap->stop(cap); +void gsr_capture_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsp_capture_tick failed: the gsr capture has not been started\n"); + return; + } + + if(cap->tick) + cap->tick(cap, video_codec_context, frame); +} + +bool gsr_capture_should_stop(gsr_capture *cap, bool *err) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsr_capture_should_stop failed: the gsr capture has not been started\n"); + return false; + } + + if(!cap->should_stop) + return false; + + return cap->should_stop(cap, err); } int gsr_capture_capture(gsr_capture *cap, AVFrame *frame) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsr_capture_capture failed: the gsr capture has not been started\n"); + return -1; + } return cap->capture(cap, frame); } -void gsr_capture_destroy(gsr_capture *cap) { - cap->destroy(cap); +void gsr_capture_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + cap->destroy(cap, video_codec_context); } diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index a2648ef..a470879 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -1,10 +1,16 @@ #include "../../include/capture/nvfbc.h" #include "../../external/NvFBC.h" +#include "../../include/cuda.h" #include #include #include #include +#include +#include +#include #include +#include +#include typedef struct { gsr_capture_nvfbc_params params; @@ -14,6 +20,8 @@ typedef struct { PNVFBCCREATEINSTANCE nv_fbc_create_instance; NVFBC_API_FUNCTION_LIST nv_fbc_function_list; bool fbc_handle_created; + + gsr_cuda cuda; } gsr_capture_nvfbc; #if defined(_WIN64) || defined(__LP64__) @@ -28,13 +36,16 @@ static int max_int(int a, int b) { } /* Returns 0 on failure */ -static uint32_t get_output_id_from_display_name(NVFBC_RANDR_OUTPUT_INFO *outputs, uint32_t num_outputs, const char *display_name) { +static uint32_t get_output_id_from_display_name(NVFBC_RANDR_OUTPUT_INFO *outputs, uint32_t num_outputs, const char *display_name, uint32_t *width, uint32_t *height) { if(!outputs) return 0; for(uint32_t i = 0; i < num_outputs; ++i) { - if(strcmp(outputs[i].name, display_name) == 0) + if(strcmp(outputs[i].name, display_name) == 0) { + *width = outputs[i].trackedBox.w; + *height = outputs[i].trackedBox.h; return outputs[i].dwId; + } } return 0; @@ -95,16 +106,78 @@ static bool gsr_capture_nvfbc_load_library(gsr_capture *cap) { return true; } -static int gsr_capture_nvfbc_start(gsr_capture *cap) { +#if LIBAVUTIL_VERSION_MAJOR < 57 +static AVBufferRef* dummy_hw_frame_init(int size) { + return av_buffer_alloc(size); +} +#else +static AVBufferRef* dummy_hw_frame_init(size_t size) { + return av_buffer_alloc(size); +} +#endif + +static bool ffmpeg_create_cuda_contexts(gsr_capture_nvfbc *cap_nvfbc, AVCodecContext *video_codec_context) { + AVBufferRef *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!device_ctx) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hardware device context\n"); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = cap_nvfbc->cuda.cu_ctx; + if(av_hwdevice_ctx_init(device_ctx) < 0) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hardware device context\n"); + av_buffer_unref(&device_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + hw_frame_context->pool = av_buffer_pool_init(1, dummy_hw_frame_init); + hw_frame_context->initial_pool_size = 1; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + av_buffer_unref(&frame_context); + return false; + } + + video_codec_context->hw_device_ctx = device_ctx; + video_codec_context->hw_frames_ctx = frame_context; + return true; +} + +static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; + if(!gsr_cuda_load(&cap_nvfbc->cuda)) + return -1; + + if(!gsr_capture_nvfbc_load_library(cap)) { + gsr_cuda_unload(&cap_nvfbc->cuda); + return -1; + } + const uint32_t x = max_int(cap_nvfbc->params.pos.x, 0); const uint32_t y = max_int(cap_nvfbc->params.pos.y, 0); const uint32_t width = max_int(cap_nvfbc->params.size.x, 0); const uint32_t height = max_int(cap_nvfbc->params.size.y, 0); - if(!cap_nvfbc->library || !cap_nvfbc->params.display_to_capture || cap_nvfbc->fbc_handle_created) - return -1; - const bool capture_region = (x > 0 || y > 0 || width > 0 || height > 0); NVFBCSTATUS status; @@ -127,7 +200,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { status = cap_nvfbc->nv_fbc_function_list.nvFBCCreateHandle(&cap_nvfbc->nv_fbc_handle, &create_params); if(status != NVFBC_SUCCESS) { fprintf(stderr, "gsr error: gsr_capture_nvfbc_start failed: %s\n", cap_nvfbc->nv_fbc_function_list.nvFBCGetLastErrorStr(cap_nvfbc->nv_fbc_handle)); - return -1; + goto error_cleanup; } } cap_nvfbc->fbc_handle_created = true; @@ -147,6 +220,8 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } + uint32_t tracking_width = XWidthOfScreen(DefaultScreenOfDisplay(cap_nvfbc->params.dpy)); + uint32_t tracking_height = XHeightOfScreen(DefaultScreenOfDisplay(cap_nvfbc->params.dpy)); tracking_type = strcmp(cap_nvfbc->params.display_to_capture, "screen") == 0 ? NVFBC_TRACKING_SCREEN : NVFBC_TRACKING_OUTPUT; if(tracking_type == NVFBC_TRACKING_OUTPUT) { if(!status_params.bXRandRAvailable) { @@ -159,7 +234,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } - output_id = get_output_id_from_display_name(status_params.outputs, status_params.dwOutputNum, cap_nvfbc->params.display_to_capture); + output_id = get_output_id_from_display_name(status_params.outputs, status_params.dwOutputNum, cap_nvfbc->params.display_to_capture, &tracking_width, &tracking_height); if(output_id == 0) { fprintf(stderr, "gsr error: gsr_capture_nvfbc_start failed: display '%s' not found\n", cap_nvfbc->params.display_to_capture); goto error_cleanup; @@ -198,6 +273,17 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } + if(capture_region) { + video_codec_context->width = width & ~1; + video_codec_context->height = height & ~1; + } else { + video_codec_context->width = tracking_width & ~1; + video_codec_context->height = tracking_height & ~1; + } + + if(!ffmpeg_create_cuda_contexts(cap_nvfbc, video_codec_context)) + goto error_cleanup; + return 0; error_cleanup: @@ -215,17 +301,16 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { cap_nvfbc->nv_fbc_function_list.nvFBCDestroyHandle(cap_nvfbc->nv_fbc_handle, &destroy_params); cap_nvfbc->fbc_handle_created = false; } - output_id = 0; + + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + gsr_cuda_unload(&cap_nvfbc->cuda); return -1; } -static void gsr_capture_nvfbc_stop(gsr_capture *cap) { +static void gsr_capture_nvfbc_destroy_session(gsr_capture *cap) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; - /* Intentionally ignore failure on destroy */ - if(!cap_nvfbc->nv_fbc_handle) - return; - NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params; memset(&destroy_capture_params, 0, sizeof(destroy_capture_params)); destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER; @@ -241,8 +326,6 @@ static void gsr_capture_nvfbc_stop(gsr_capture *cap) { static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; - if(!cap_nvfbc->library || !cap_nvfbc->fbc_handle_created) - return -1; CUdeviceptr cu_device_ptr = 0; @@ -274,18 +357,19 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { return 0; } -static void gsr_capture_nvfbc_destroy(gsr_capture *cap) { - if(cap) { - gsr_capture_nvfbc *cap_nvfbc = cap->priv; - gsr_capture_nvfbc_stop(cap); - if(cap_nvfbc) { - dlclose(cap_nvfbc->library); - free((void*)cap_nvfbc->params.display_to_capture); - free(cap->priv); - cap->priv = NULL; - } - free(cap); +static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_nvfbc *cap_nvfbc = cap->priv; + gsr_capture_nvfbc_destroy_session(cap); + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(cap_nvfbc) { + gsr_cuda_unload(&cap_nvfbc->cuda); + dlclose(cap_nvfbc->library); + free((void*)cap_nvfbc->params.display_to_capture); + free(cap->priv); + cap->priv = NULL; } + free(cap); } gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { @@ -294,6 +378,11 @@ gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { return NULL; } + if(!params->display_to_capture) { + fprintf(stderr, "gsr error: gsr_capture_nvfbc_create params.display_to_capture is NULL\n"); + return NULL; + } + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); if(!cap) return NULL; @@ -317,16 +406,12 @@ gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { *cap = (gsr_capture) { .start = gsr_capture_nvfbc_start, - .stop = gsr_capture_nvfbc_stop, + .tick = NULL, + .should_stop = NULL, .capture = gsr_capture_nvfbc_capture, .destroy = gsr_capture_nvfbc_destroy, .priv = cap_nvfbc }; - if(!gsr_capture_nvfbc_load_library(cap)) { - gsr_capture_nvfbc_destroy(cap); - return NULL; - } - return cap; } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c new file mode 100644 index 0000000..755ac92 --- /dev/null +++ b/src/capture/xcomposite.c @@ -0,0 +1,517 @@ +#include "../../include/capture/xcomposite.h" +#include "../../include/gl.h" +#include "../../include/cuda.h" +#include "../../include/window_texture.h" +#include "../../include/time.h" +#include +#include +#include +#include +#include + +/* TODO: Proper error checks and cleanups */ + +typedef struct { + gsr_capture_xcomposite_params params; + Display *dpy; + XEvent xev; + bool should_stop; + bool stop_is_error; + bool window_resized; + bool created_hw_frame; + double window_resize_timer; + + vec2i window_size; + vec2i window_pos; + + unsigned int target_texture_id; + vec2i texture_size; + Window composite_window; + WindowTexture window_texture; + + CUgraphicsResource cuda_graphics_resource; + CUarray mapped_array; + + gsr_gl gl; + gsr_cuda cuda; +} gsr_capture_xcomposite; + +static int max_int(int a, int b) { + return a > b ? a : b; +} + +static int min_int(int a, int b) { + return a < b ? a : b; +} + +static void gsr_capture_xcomposite_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +static Window get_compositor_window(Display *display) { + Window overlay_window = XCompositeGetOverlayWindow(display, DefaultRootWindow(display)); + XCompositeReleaseOverlayWindow(display, DefaultRootWindow(display)); + + Window root_window, parent_window; + Window *children = NULL; + unsigned int num_children = 0; + if(XQueryTree(display, overlay_window, &root_window, &parent_window, &children, &num_children) == 0) + return None; + + Window compositor_window = None; + if(num_children == 1) { + compositor_window = children[0]; + const int screen_width = XWidthOfScreen(DefaultScreenOfDisplay(display)); + const int screen_height = XHeightOfScreen(DefaultScreenOfDisplay(display)); + + XWindowAttributes attr; + if(!XGetWindowAttributes(display, compositor_window, &attr) || attr.width != screen_width || attr.height != screen_height) + compositor_window = None; + } + + if(children) + XFree(children); + + return compositor_window; +} + +/* TODO: check for glx swap control extension string (GLX_EXT_swap_control, etc) */ +static void set_vertical_sync_enabled(Display *display, Window window, gsr_gl *gl, bool enabled) { + int result = 0; + + if(gl->glXSwapIntervalEXT) { + gl->glXSwapIntervalEXT(display, window, enabled ? 1 : 0); + } else if(gl->glXSwapIntervalMESA) { + result = gl->glXSwapIntervalMESA(enabled ? 1 : 0); + } else if(gl->glXSwapIntervalSGI) { + result = gl->glXSwapIntervalSGI(enabled ? 1 : 0); + } else { + static int warned = 0; + if (!warned) { + warned = 1; + fprintf(stderr, "Warning: setting vertical sync not supported\n"); + } + } + + if(result != 0) + fprintf(stderr, "Warning: setting vertical sync failed\n"); +} + +static bool cuda_register_opengl_texture(gsr_capture_xcomposite *cap_xcomp) { + CUresult res; + CUcontext old_ctx; + res = cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + res = cap_xcomp->cuda.cuGraphicsGLRegisterImage( + &cap_xcomp->cuda_graphics_resource, cap_xcomp->target_texture_id, GL_TEXTURE_2D, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_xcomp->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, + "Error: cuGraphicsGLRegisterImage failed, error %s, texture " + "id: %u\n", + err_str, cap_xcomp->target_texture_id); + return false; + } + + /* Get texture */ + res = cap_xcomp->cuda.cuGraphicsResourceSetMapFlags(cap_xcomp->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_xcomp->cuda.cuGraphicsMapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + + /* Map texture to cuda array */ + res = cap_xcomp->cuda.cuGraphicsSubResourceGetMappedArray(&cap_xcomp->mapped_array, cap_xcomp->cuda_graphics_resource, 0, 0); + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return true; +} + +static bool cuda_create_codec_context(gsr_capture_xcomposite *cap_xcomp, AVCodecContext *video_codec_context) { + CUcontext old_ctx; + cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + AVBufferRef *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!device_ctx) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = cap_xcomp->cuda.cu_ctx; + if(av_hwdevice_ctx_init(device_ctx) < 0) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + av_buffer_unref(&device_ctx); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "Error: Failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = + (AVHWFramesContext *)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "Error: Failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + av_buffer_unref(&frame_context); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + video_codec_context->hw_device_ctx = device_ctx; + video_codec_context->hw_frames_ctx = frame_context; + return true; +} + +static unsigned int gl_create_texture(gsr_capture_xcomposite *cap_xcomp, int width, int height) { + // Generating this second texture is needed because + // cuGraphicsGLRegisterImage cant be used with the texture that is mapped + // directly to the pixmap. + // TODO: Investigate if it's somehow possible to use the pixmap texture + // directly, this should improve performance since only less image copy is + // then needed every frame. + // Ignoring failure for now.. TODO: Show proper error + unsigned int texture_id = 0; + cap_xcomp->gl.glGenTextures(1, &texture_id); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, texture_id); + cap_xcomp->gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + return texture_id; +} + +static int gsr_capture_xcomposite_start(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + XWindowAttributes attr; + if(!XGetWindowAttributes(cap_xcomp->dpy, cap_xcomp->params.window, &attr)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start failed: invalid window id: %lu\n", cap_xcomp->params.window); + return -1; + } + + cap_xcomp->window_size.x = max_int(attr.width, 0); + cap_xcomp->window_size.y = max_int(attr.height, 0); + Window c; + XTranslateCoordinates(cap_xcomp->dpy, cap_xcomp->params.window, DefaultRootWindow(cap_xcomp->dpy), 0, 0, &cap_xcomp->window_pos.x, &cap_xcomp->window_pos.y, &c); + + XSelectInput(cap_xcomp->dpy, cap_xcomp->params.window, StructureNotifyMask | ExposureMask); + + if(!gsr_gl_load(&cap_xcomp->gl, cap_xcomp->dpy)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to load opengl\n"); + return -1; + } + + set_vertical_sync_enabled(cap_xcomp->dpy, cap_xcomp->gl.window, &cap_xcomp->gl, false); + if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->gl) != 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed get window texture for window %ld\n", cap_xcomp->params.window); + gsr_gl_unload(&cap_xcomp->gl); + return -1; + } + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->texture_size.x = 0; + cap_xcomp->texture_size.y = 0; + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x); + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->texture_size.x = max_int(2, cap_xcomp->texture_size.x & ~1); + cap_xcomp->texture_size.y = max_int(2, cap_xcomp->texture_size.y & ~1); + + cap_xcomp->target_texture_id = gl_create_texture(cap_xcomp, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y); + if(cap_xcomp->target_texture_id == 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to create opengl texture\n"); + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + video_codec_context->width = cap_xcomp->texture_size.x; + video_codec_context->height = cap_xcomp->texture_size.y; + + if(!gsr_cuda_load(&cap_xcomp->cuda)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_create_codec_context(cap_xcomp, video_codec_context)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_register_opengl_texture(cap_xcomp)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + return 0; +} + +static void gsr_capture_xcomposite_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + window_texture_deinit(&cap_xcomp->window_texture); + + if(cap_xcomp->target_texture_id) { + cap_xcomp->gl.glDeleteTextures(1, &cap_xcomp->target_texture_id); + cap_xcomp->target_texture_id = 0; + } + + if(cap_xcomp->composite_window) { + XCompositeUnredirectWindow(cap_xcomp->dpy, cap_xcomp->composite_window, CompositeRedirectAutomatic); + cap_xcomp->composite_window = None; + } + + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); + gsr_cuda_unload(&cap_xcomp->cuda); + + gsr_gl_unload(&cap_xcomp->gl); + if(cap_xcomp->dpy) { + XCloseDisplay(cap_xcomp->dpy); + cap_xcomp->dpy = NULL; + } +} + +static void gsr_capture_xcomposite_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + cap_xcomp->gl.glClear(GL_COLOR_BUFFER_BIT); + + if(!cap_xcomp->created_hw_frame) { + CUcontext old_ctx; + cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: av_hwframe_get_buffer failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + cap_xcomp->created_hw_frame = true; + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, DestroyNotify, &cap_xcomp->xev)) { + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = false; + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, Expose, &cap_xcomp->xev) && cap_xcomp->xev.xexpose.count == 0) { + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + cap_xcomp->window_resized = true; + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, ConfigureNotify, &cap_xcomp->xev) && cap_xcomp->xev.xconfigure.window == cap_xcomp->params.window) { + while(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, ConfigureNotify, &cap_xcomp->xev)) {} + Window c; + XTranslateCoordinates(cap_xcomp->dpy, cap_xcomp->params.window, DefaultRootWindow(cap_xcomp->dpy), 0, 0, &cap_xcomp->xev.xconfigure.x, &cap_xcomp->xev.xconfigure.y, &c); + cap_xcomp->window_pos.x = cap_xcomp->xev.xconfigure.x; + cap_xcomp->window_pos.y = cap_xcomp->xev.xconfigure.y; + + /* Window resize */ + if(cap_xcomp->xev.xconfigure.width != cap_xcomp->window_size.x || cap_xcomp->xev.xconfigure.height != cap_xcomp->window_size.y) { + cap_xcomp->window_size.x = max_int(cap_xcomp->xev.xconfigure.width, 0); + cap_xcomp->window_size.y = max_int(cap_xcomp->xev.xconfigure.height, 0); + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + cap_xcomp->window_resized = true; + } + } + + const double window_resize_timeout = 1.0; // 1 second + if(cap_xcomp->window_resized && clock_get_monotonic_seconds() - cap_xcomp->window_resize_timer >= window_resize_timeout) { + cap_xcomp->window_resized = false; + fprintf(stderr, "Resize window!\n"); + if(window_texture_on_resize(&cap_xcomp->window_texture) != 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: window_texture_on_resize failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + return; + } + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->texture_size.x = 0; + cap_xcomp->texture_size.y = 0; + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x); + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->texture_size.x = min_int(video_codec_context->width, max_int(2, cap_xcomp->texture_size.x & ~1)); + cap_xcomp->texture_size.y = min_int(video_codec_context->height, max_int(2, cap_xcomp->texture_size.y & ~1)); + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_texture_id); + cap_xcomp->gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + CUcontext old_ctx; + CUresult res = cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); + res = cap_xcomp->cuda.cuGraphicsGLRegisterImage(&cap_xcomp->cuda_graphics_resource, cap_xcomp->target_texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_xcomp->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: cuGraphicsGLRegisterImage failed, error %s, texture id: %u\n", err_str, cap_xcomp->target_texture_id); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + res = cap_xcomp->cuda.cuGraphicsResourceSetMapFlags(cap_xcomp->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_xcomp->cuda.cuGraphicsMapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + res = cap_xcomp->cuda.cuGraphicsSubResourceGetMappedArray(&cap_xcomp->mapped_array, cap_xcomp->cuda_graphics_resource, 0, 0); + + av_frame_free(frame); + *frame = av_frame_alloc(); + if(!frame) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: failed to allocate frame\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + (*frame)->format = video_codec_context->pix_fmt; + (*frame)->width = video_codec_context->width; + (*frame)->height = video_codec_context->height; + + if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: av_hwframe_get_buffer failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + // Make it completely black to clear unused parts + // TODO: cuMemsetD32? + res = cap_xcomp->cuda.cuMemsetD8_v2((CUdeviceptr)(*frame)->data[0], 0, (*frame)->width * (*frame)->height * 4); + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + } +} + +static bool gsr_capture_xcomposite_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + if(cap_xcomp->should_stop) { + if(err) + *err = cap_xcomp->stop_is_error; + return true; + } + + if(err) + *err = false; + return false; +} + +static int gsr_capture_xcomposite_capture(gsr_capture *cap, AVFrame *frame) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + // TODO: Use a framebuffer instead. glCopyImageSubData requires opengl 4.2 + vec2i source_pos = { 0, 0 }; + vec2i source_size = cap_xcomp->texture_size; + + // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2. + cap_xcomp->gl.glCopyImageSubData( + window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), GL_TEXTURE_2D, 0, source_pos.x, source_pos.y, 0, + cap_xcomp->target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, + source_size.x, source_size.y, 1); + unsigned int err = cap_xcomp->gl.glGetError(); + if(err != 0) { + static bool error_shown = false; + if(!error_shown) { + error_shown = true; + fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); + } + } + cap_xcomp->gl.glXSwapBuffers(cap_xcomp->dpy, cap_xcomp->gl.window); + // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id + + frame->linesize[0] = frame->width * 4; + + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_xcomp->mapped_array; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; + memcpy_struct.dstPitch = frame->linesize[0]; + memcpy_struct.WidthInBytes = frame->width * 4; + memcpy_struct.Height = frame->height; + cap_xcomp->cuda.cuMemcpy2D_v2(&memcpy_struct); + + return 0; +} + +static void gsr_capture_xcomposite_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + if(cap->priv) { + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_xcomposite_create(const gsr_capture_xcomposite_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_xcomposite *cap_xcomp = calloc(1, sizeof(gsr_capture_xcomposite)); + if(!cap_xcomp) { + free(cap); + return NULL; + } + + Display *display = XOpenDisplay(NULL); + if(!display) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_create failed: XOpenDisplay failed\n"); + free(cap); + free(cap_xcomp); + return NULL; + } + + cap_xcomp->dpy = display; + cap_xcomp->params = *params; + + *cap = (gsr_capture) { + .start = gsr_capture_xcomposite_start, + .tick = gsr_capture_xcomposite_tick, + .should_stop = gsr_capture_xcomposite_should_stop, + .capture = gsr_capture_xcomposite_capture, + .destroy = gsr_capture_xcomposite_destroy, + .priv = cap_xcomp + }; + + return cap; +} -- cgit v1.2.3