From a7e0dbd83381377bd05a3fa988511d3713996370 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 16 Oct 2022 02:08:40 +0200 Subject: Refactor xcomposite into abstract capture api Refactor c++ files into c files, more usable --- .clang-format | 2 - README.md | 2 +- TODO | 3 +- build.sh | 7 +- include/CudaLibrary.hpp | 143 ------ include/GlLibrary.hpp | 156 ------- include/LibraryLoader.hpp | 38 -- include/capture/capture.h | 19 +- include/capture/nvfbc.h | 5 +- include/capture/xcomposite.h | 16 + include/cuda.h | 101 ++++ include/gl.h | 102 +++++ include/library_loader.h | 41 ++ include/time.h | 6 + include/window_texture.h | 28 ++ src/capture/capture.c | 42 +- src/capture/nvfbc.c | 151 ++++-- src/capture/xcomposite.c | 517 +++++++++++++++++++++ src/cuda.c | 100 ++++ src/gl.c | 198 ++++++++ src/main.cpp | 1039 +++--------------------------------------- src/sound.cpp | 28 +- src/time.c | 10 + src/window_texture.c | 176 +++++++ 24 files changed, 1544 insertions(+), 1386 deletions(-) delete mode 100644 .clang-format delete mode 100644 include/CudaLibrary.hpp delete mode 100644 include/GlLibrary.hpp delete mode 100644 include/LibraryLoader.hpp create mode 100644 include/capture/xcomposite.h create mode 100644 include/cuda.h create mode 100644 include/gl.h create mode 100644 include/library_loader.h create mode 100644 include/time.h create mode 100644 include/window_texture.h create mode 100644 src/capture/xcomposite.c create mode 100644 src/cuda.c create mode 100644 src/gl.c create mode 100644 src/time.c create mode 100644 src/window_texture.c diff --git a/.clang-format b/.clang-format deleted file mode 100644 index 80d3293..0000000 --- a/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -BasedOnStyle: LLVM -IndentWidth: 4 diff --git a/README.md b/README.md index 344b435..db28602 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Send signal SIGUSR1 (`killall -SIGUSR1 gpu-screen-recorder`) to gpu-screen-recor You can find the default output audio device (headset, speakers (in other words, desktop audio)) with the command `pactl get-default-sink`. Add `monitor` to the end of that to use that as an audio input in gpu-screen-recorder.\ You can find the default input audio device (microphone) with the command `pactl get-default-source`. This input should not have `monitor` added to the end when used in gpu-screen-recorder.\ Example of recording both desktop audio and microphone: `gpu-screen-recorder -w $(xdotool selectwindow) -c mp4 -f 60 -a "$(pactl get-default-sink).monitor" -a "$(pactl get-default-source)" -o test_video.mp4`.\ -Note that if you use multiple audio inputs then they are each recorded into separate audio tracks in the video file. There is currently no option to merge audio tracks, but it's a planned feature. +Note that if you use multiple audio inputs then they are each recorded into separate audio tracks in the video file. There is currently no option to merge audio tracks, but it's a planned feature. For now I recommend using gpwgraph if you are using pipewire. Gpwgraph allows you to merge multiple audio inputs into one with a simple gui. If you use pulseaudio then you need to create a virtual sink, which is a bit more complex. There is also a gui for the gpu-screen-recorder called [gpu-screen-recorder-gtk](https://git.dec05eba.com/gpu-screen-recorder-gtk/). diff --git a/TODO b/TODO index ad1b158..e497e77 100644 --- a/TODO +++ b/TODO @@ -13,4 +13,5 @@ Allow recording a region by recording the compositor proxy window / nvfbc window Resizing the target window to be smaller than the initial size is buggy. The window texture ends up duplicated in the video. Handle frames (especially for applications with rounded client-side decorations, such as gnome applications. They are huge). Use nvenc directly, which allows removing the use of cuda. -Fallback to nvfbc and window tracking if window capture fails. \ No newline at end of file +Fallback to nvfbc and window tracking if window capture fails. +Handle xrandr monitor change in nvfbc. \ No newline at end of file diff --git a/build.sh b/build.sh index 5d42a3c..4d4b61b 100755 --- a/build.sh +++ b/build.sh @@ -5,7 +5,12 @@ includes="$(pkg-config --cflags $dependencies)" libs="$(pkg-config --libs $dependencies) -ldl -pthread -lm" gcc -c src/capture/capture.c -O2 -g0 -DNDEBUG $includes gcc -c src/capture/nvfbc.c -O2 -g0 -DNDEBUG $includes +gcc -c src/capture/xcomposite.c -O2 -g0 -DNDEBUG $includes +gcc -c src/gl.c -O2 -g0 -DNDEBUG $includes +gcc -c src/cuda.c -O2 -g0 -DNDEBUG $includes +gcc -c src/window_texture.c -O2 -g0 -DNDEBUG $includes +gcc -c src/time.c -O2 -g0 -DNDEBUG $includes g++ -c src/sound.cpp -O2 -g0 -DNDEBUG $includes g++ -c src/main.cpp -O2 -g0 -DNDEBUG $includes -g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o sound.o main.o -s $libs +g++ -o gpu-screen-recorder -O2 capture.o nvfbc.o gl.o cuda.o window_texture.o time.o xcomposite.o sound.o main.o -s $libs echo "Successfully built gpu-screen-recorder" \ No newline at end of file diff --git a/include/CudaLibrary.hpp b/include/CudaLibrary.hpp deleted file mode 100644 index fe99975..0000000 --- a/include/CudaLibrary.hpp +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once - -#include "LibraryLoader.hpp" - -#include -#include - -// To prevent hwcontext_cuda.h from including cuda.h -#define CUDA_VERSION 11070 - -#if defined(_WIN64) || defined(__LP64__) -typedef unsigned long long CUdeviceptr_v2; -#else -typedef unsigned int CUdeviceptr_v2; -#endif -typedef CUdeviceptr_v2 CUdeviceptr; - -typedef int CUresult; -typedef int CUdevice_v1; -typedef CUdevice_v1 CUdevice; -typedef struct CUctx_st *CUcontext; -typedef struct CUstream_st *CUstream; -typedef struct CUarray_st *CUarray; - -static const int CUDA_SUCCESS = 0; - -typedef enum CUgraphicsMapResourceFlags_enum { - CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, - CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 -} CUgraphicsMapResourceFlags; - -typedef enum CUgraphicsRegisterFlags_enum { - CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, - CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, - CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04, - CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08 -} CUgraphicsRegisterFlags; - -typedef enum CUmemorytype_enum { - CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ - CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ - CU_MEMORYTYPE_ARRAY = 0x03, /**< Array memory */ - CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ -} CUmemorytype; - -typedef struct CUDA_MEMCPY2D_st { - size_t srcXInBytes; /**< Source X in bytes */ - size_t srcY; /**< Source Y */ - - CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ - const void *srcHost; /**< Source host pointer */ - CUdeviceptr srcDevice; /**< Source device pointer */ - CUarray srcArray; /**< Source array reference */ - size_t srcPitch; /**< Source pitch (ignored when src is array) */ - - size_t dstXInBytes; /**< Destination X in bytes */ - size_t dstY; /**< Destination Y */ - - CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ - void *dstHost; /**< Destination host pointer */ - CUdeviceptr dstDevice; /**< Destination device pointer */ - CUarray dstArray; /**< Destination array reference */ - size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ - - size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ - size_t Height; /**< Height of 2D memory copy */ -} CUDA_MEMCPY2D_v2; -typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D; - -static const int CU_CTX_SCHED_AUTO = 0; - -typedef struct CUgraphicsResource_st *CUgraphicsResource; - -struct Cuda { - CUresult (*cuInit)(unsigned int Flags); - CUresult (*cuDeviceGetCount)(int *count); - CUresult (*cuDeviceGet)(CUdevice *device, int ordinal); - CUresult (*cuCtxCreate_v2)(CUcontext *pctx, unsigned int flags, CUdevice dev); - CUresult (*cuCtxPushCurrent_v2)(CUcontext ctx); - CUresult (*cuCtxPopCurrent_v2)(CUcontext *pctx); - CUresult (*cuGetErrorString)(CUresult error, const char **pStr); - CUresult (*cuMemsetD8_v2)(CUdeviceptr dstDevice, unsigned char uc, size_t N); - CUresult (*cuMemcpy2D_v2)(const CUDA_MEMCPY2D *pCopy); - - CUresult (*cuGraphicsGLRegisterImage)(CUgraphicsResource *pCudaResource, unsigned int image, unsigned int target, unsigned int Flags); - CUresult (*cuGraphicsResourceSetMapFlags)(CUgraphicsResource resource, unsigned int flags); - CUresult (*cuGraphicsMapResources)(unsigned int count, CUgraphicsResource *resources, CUstream hStream); - CUresult (*cuGraphicsUnregisterResource)(CUgraphicsResource resource); - CUresult (*cuGraphicsSubResourceGetMappedArray)(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); - - ~Cuda() { - if(library) - dlclose(library); - } - - bool load() { - if(library) - return true; - - dlerror(); // clear - void *lib = dlopen("libcuda.so.1", RTLD_LAZY); - if(!lib) { - lib = dlopen("libcuda.so", RTLD_LAZY); - if(!lib) { - fprintf(stderr, "Error: failed to load libcuda.so/libcuda.so.1, error: %s\n", dlerror()); - return false; - } - } - - dlsym_assign required_dlsym[] = { - { (void**)&cuInit, "cuInit" }, - { (void**)&cuDeviceGetCount, "cuDeviceGetCount" }, - { (void**)&cuDeviceGet, "cuDeviceGet" }, - { (void**)&cuCtxCreate_v2, "cuCtxCreate_v2" }, - { (void**)&cuCtxPushCurrent_v2, "cuCtxPushCurrent_v2" }, - { (void**)&cuCtxPopCurrent_v2, "cuCtxPopCurrent_v2" }, - { (void**)&cuGetErrorString, "cuGetErrorString" }, - { (void**)&cuMemsetD8_v2, "cuMemsetD8_v2" }, - { (void**)&cuMemcpy2D_v2, "cuMemcpy2D_v2" }, - - { (void**)&cuGraphicsGLRegisterImage, "cuGraphicsGLRegisterImage" }, - { (void**)&cuGraphicsResourceSetMapFlags, "cuGraphicsResourceSetMapFlags" }, - { (void**)&cuGraphicsMapResources, "cuGraphicsMapResources" }, - { (void**)&cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource" }, - { (void**)&cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray" }, - - { NULL, NULL } - }; - - if(dlsym_load_list(lib, required_dlsym)) { - library = lib; - return true; - } else { - fprintf(stderr, "Error: missing required symbols in libcuda.so\n"); - dlclose(lib); - return false; - } - } -private: - void *library = nullptr; -}; diff --git a/include/GlLibrary.hpp b/include/GlLibrary.hpp deleted file mode 100644 index 1337ef3..0000000 --- a/include/GlLibrary.hpp +++ /dev/null @@ -1,156 +0,0 @@ -#pragma once - -#include "LibraryLoader.hpp" - -#include -#include -#include -#include - -typedef XID GLXPixmap; -typedef XID GLXDrawable; -typedef XID GLXWindow; - -typedef struct __GLXcontextRec *GLXContext; -typedef struct __GLXFBConfigRec *GLXFBConfig; - -#define GL_TEXTURE_2D 0x0DE1 -#define GL_RGB 0x1907 -#define GL_UNSIGNED_BYTE 0x1401 -#define GL_COLOR_BUFFER_BIT 0x00004000 -#define GL_TEXTURE_WRAP_S 0x2802 -#define GL_TEXTURE_WRAP_T 0x2803 -#define GL_TEXTURE_MAG_FILTER 0x2800 -#define GL_TEXTURE_MIN_FILTER 0x2801 -#define GL_TEXTURE_WIDTH 0x1000 -#define GL_TEXTURE_HEIGHT 0x1001 -#define GL_NEAREST 0x2600 - -#define GL_RENDERER 0x1F01 - -#define GLX_BUFFER_SIZE 2 -#define GLX_DOUBLEBUFFER 5 -#define GLX_RED_SIZE 8 -#define GLX_GREEN_SIZE 9 -#define GLX_BLUE_SIZE 10 -#define GLX_ALPHA_SIZE 11 -#define GLX_DEPTH_SIZE 12 - -#define GLX_RGBA_BIT 0x00000001 -#define GLX_RENDER_TYPE 0x8011 -#define GLX_FRONT_EXT 0x20DE -#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0 -#define GLX_DRAWABLE_TYPE 0x8010 -#define GLX_WINDOW_BIT 0x00000001 -#define GLX_PIXMAP_BIT 0x00000002 -#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3 -#define GLX_TEXTURE_2D_BIT_EXT 0x00000002 -#define GLX_TEXTURE_TARGET_EXT 0x20D6 -#define GLX_TEXTURE_2D_EXT 0x20DC -#define GLX_TEXTURE_FORMAT_EXT 0x20D5 -#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9 -#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x00000002 -#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 -#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 -#define GLX_CONTEXT_FLAGS_ARB 0x2094 - -struct GlLibrary { - GLXPixmap (*glXCreatePixmap)(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList); - void (*glXDestroyPixmap)(Display *dpy, GLXPixmap pixmap); - void (*glXBindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list); - void (*glXReleaseTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer); - GLXFBConfig* (*glXChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems); - XVisualInfo* (*glXGetVisualFromFBConfig)(Display *dpy, GLXFBConfig config); - GLXContext (*glXCreateContextAttribsARB)(Display *dpy, GLXFBConfig config, GLXContext share_context, Bool direct, const int *attrib_list); - Bool (*glXMakeContextCurrent)(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); - void (*glXDestroyContext)(Display *dpy, GLXContext ctx); - void (*glXSwapBuffers)(Display *dpy, GLXDrawable drawable); - - void (*glXSwapIntervalEXT)(Display *dpy, GLXDrawable drawable, int interval); - int (*glXSwapIntervalMESA)(unsigned int interval); - int (*glXSwapIntervalSGI)(int interval); - - void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data); - - unsigned int (*glGetError)(void); - const unsigned char* (*glGetString)(unsigned int name); - void (*glClear)(unsigned int mask); - void (*glGenTextures)(int n, unsigned int *textures); - void (*glDeleteTextures)(int n, const unsigned int *texture); - void (*glBindTexture)(unsigned int target, unsigned int texture); - void (*glTexParameteri)(unsigned int target, unsigned int pname, int param); - void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params); - void (*glTexImage2D)(unsigned int target, int level, int internalFormat, int width, int height, int border, unsigned int format, unsigned int type, const void *pixels); - void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth); - - ~GlLibrary() { - unload(); - } - - bool load() { - if(library) - return true; - - dlerror(); // clear - void *lib = dlopen("libGL.so.1", RTLD_LAZY); - if(!lib) { - fprintf(stderr, "Error: failed to load libGL.so.1, error: %s\n", dlerror()); - return false; - } - - dlsym_assign optional_dlsym[] = { - { (void**)&glClearTexImage, "glClearTexImage" }, - { (void**)&glXSwapIntervalEXT, "glXSwapIntervalEXT" }, - { (void**)&glXSwapIntervalMESA, "glXSwapIntervalMESA" }, - { (void**)&glXSwapIntervalSGI, "glXSwapIntervalSGI" }, - - { NULL, NULL } - }; - - dlsym_load_list_optional(lib, optional_dlsym); - - dlsym_assign required_dlsym[] = { - { (void**)&glXCreatePixmap, "glXCreatePixmap" }, - { (void**)&glXDestroyPixmap, "glXDestroyPixmap" }, - { (void**)&glXBindTexImageEXT, "glXBindTexImageEXT" }, - { (void**)&glXReleaseTexImageEXT, "glXReleaseTexImageEXT" }, - { (void**)&glXChooseFBConfig, "glXChooseFBConfig" }, - { (void**)&glXGetVisualFromFBConfig, "glXGetVisualFromFBConfig" }, - { (void**)&glXCreateContextAttribsARB, "glXCreateContextAttribsARB" }, - { (void**)&glXMakeContextCurrent, "glXMakeContextCurrent" }, - { (void**)&glXDestroyContext, "glXDestroyContext" }, - { (void**)&glXSwapBuffers, "glXSwapBuffers" }, - - { (void**)&glGetError, "glGetError" }, - { (void**)&glGetString, "glGetString" }, - { (void**)&glClear, "glClear" }, - { (void**)&glGenTextures, "glGenTextures" }, - { (void**)&glDeleteTextures, "glDeleteTextures" }, - { (void**)&glBindTexture, "glBindTexture" }, - { (void**)&glTexParameteri, "glTexParameteri" }, - { (void**)&glGetTexLevelParameteriv, "glGetTexLevelParameteriv" }, - { (void**)&glTexImage2D, "glTexImage2D" }, - { (void**)&glCopyImageSubData, "glCopyImageSubData" }, - - { NULL, NULL } - }; - - if(dlsym_load_list(lib, required_dlsym)) { - library = lib; - return true; - } else { - fprintf(stderr, "Error: missing required symbols in libGL.so.1\n"); - dlclose(lib); - return false; - } - } - - void unload() { - if(library) { - dlclose(library); - library = nullptr; - } - } -private: - void *library = nullptr; -}; diff --git a/include/LibraryLoader.hpp b/include/LibraryLoader.hpp deleted file mode 100644 index 16dc580..0000000 --- a/include/LibraryLoader.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include -#include - -typedef struct { - void **func; - const char *name; -} dlsym_assign; - -static void* dlsym_print_fail(void *handle, const char *name, bool required) { - dlerror(); - void *sym = dlsym(handle, name); - char *err_str = dlerror(); - - if(!sym) - fprintf(stderr, "%s: dlsym(handle, \"%s\") failed, error: %s\n", required ? "error" : "warning", name, err_str ? err_str : "(null)"); - - return sym; -} - -/* |dlsyms| should be null terminated */ -static bool dlsym_load_list(void *handle, const dlsym_assign *dlsyms) { - bool success = true; - for(int i = 0; dlsyms[i].func; ++i) { - *dlsyms[i].func = dlsym_print_fail(handle, dlsyms[i].name, true); - if(!*dlsyms[i].func) - success = false; - } - return success; -} - -/* |dlsyms| should be null terminated */ -static void dlsym_load_list_optional(void *handle, const dlsym_assign *dlsyms) { - for(int i = 0; dlsyms[i].func; ++i) { - *dlsyms[i].func = dlsym_print_fail(handle, dlsyms[i].name, false); - } -} \ No newline at end of file diff --git a/include/capture/capture.h b/include/capture/capture.h index 9e23a23..edcc14d 100644 --- a/include/capture/capture.h +++ b/include/capture/capture.h @@ -3,23 +3,28 @@ #include +typedef struct AVCodecContext AVCodecContext; typedef struct AVFrame AVFrame; typedef struct gsr_capture gsr_capture; struct gsr_capture { - int (*start)(gsr_capture *cap); - void (*stop)(gsr_capture *cap); + /* These methods should not be called manually. Call gsr_capture_* instead */ + int (*start)(gsr_capture *cap, AVCodecContext *video_codec_context); + void (*tick)(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame); /* can be NULL */ + bool (*should_stop)(gsr_capture *cap, bool *err); /* can be NULL */ int (*capture)(gsr_capture *cap, AVFrame *frame); - void (*destroy)(gsr_capture *cap); + void (*destroy)(gsr_capture *cap, AVCodecContext *video_codec_context); - void *priv; + void *priv; /* can be NULL */ + bool started; }; -int gsr_capture_start(gsr_capture *cap); -void gsr_capture_stop(gsr_capture *cap); +int gsr_capture_start(gsr_capture *cap, AVCodecContext *video_codec_context); +void gsr_capture_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame); +bool gsr_capture_should_stop(gsr_capture *cap, bool *err); int gsr_capture_capture(gsr_capture *cap, AVFrame *frame); /* Calls |gsr_capture_stop| as well */ -void gsr_capture_destroy(gsr_capture *cap); +void gsr_capture_destroy(gsr_capture *cap, AVCodecContext *video_codec_context); #endif /* GSR_CAPTURE_CAPTURE_H */ diff --git a/include/capture/nvfbc.h b/include/capture/nvfbc.h index b749b87..06587d8 100644 --- a/include/capture/nvfbc.h +++ b/include/capture/nvfbc.h @@ -4,8 +4,11 @@ #include "capture.h" #include "../vec2.h" +typedef struct _XDisplay Display; + typedef struct { - const char *display_to_capture; /* if this is "screen", then the entire x11 screen is captured (all displays) */ + Display *dpy; + const char *display_to_capture; /* if this is "screen", then the entire x11 screen is captured (all displays). A copy is made of this */ int fps; vec2i pos; vec2i size; diff --git a/include/capture/xcomposite.h b/include/capture/xcomposite.h new file mode 100644 index 0000000..edd335c --- /dev/null +++ b/include/capture/xcomposite.h @@ -0,0 +1,16 @@ +#ifndef GSR_CAPTURE_XCOMPOSITE_H +#define GSR_CAPTURE_XCOMPOSITE_H + +#include "capture.h" +#include "../vec2.h" +#include + +typedef struct _XDisplay Display; + +typedef struct { + Window window; +} gsr_capture_xcomposite_params; + +gsr_capture* gsr_capture_xcomposite_create(const gsr_capture_xcomposite_params *params); + +#endif /* GSR_CAPTURE_XCOMPOSITE_H */ diff --git a/include/cuda.h b/include/cuda.h new file mode 100644 index 0000000..cefdcad --- /dev/null +++ b/include/cuda.h @@ -0,0 +1,101 @@ +#ifndef GSR_CUDA_H +#define GSR_CUDA_H + +#include +#include + +// To prevent hwcontext_cuda.h from including cuda.h +#define CUDA_VERSION 11070 + +#if defined(_WIN64) || defined(__LP64__) +typedef unsigned long long CUdeviceptr_v2; +#else +typedef unsigned int CUdeviceptr_v2; +#endif +typedef CUdeviceptr_v2 CUdeviceptr; + +typedef int CUresult; +typedef int CUdevice_v1; +typedef CUdevice_v1 CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUarray_st *CUarray; + +#define CUDA_SUCCESS 0 + +typedef enum CUgraphicsMapResourceFlags_enum { + CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, + CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, + CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 +} CUgraphicsMapResourceFlags; + +typedef enum CUgraphicsRegisterFlags_enum { + CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, + CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04, + CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08 +} CUgraphicsRegisterFlags; + +typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ + CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ + CU_MEMORYTYPE_ARRAY = 0x03, /**< Array memory */ + CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ +} CUmemorytype; + +typedef struct CUDA_MEMCPY2D_st { + size_t srcXInBytes; /**< Source X in bytes */ + size_t srcY; /**< Source Y */ + + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ + size_t srcPitch; /**< Source pitch (ignored when src is array) */ + + size_t dstXInBytes; /**< Destination X in bytes */ + size_t dstY; /**< Destination Y */ + + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ + + size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ + size_t Height; /**< Height of 2D memory copy */ +} CUDA_MEMCPY2D_v2; +typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D; + +#define CU_CTX_SCHED_AUTO 0 + +typedef struct CUgraphicsResource_st *CUgraphicsResource; + +typedef struct { + void *library; + CUcontext cu_ctx; + + CUresult (*cuInit)(unsigned int Flags); + CUresult (*cuDeviceGetCount)(int *count); + CUresult (*cuDeviceGet)(CUdevice *device, int ordinal); + CUresult (*cuCtxCreate_v2)(CUcontext *pctx, unsigned int flags, CUdevice dev); + CUresult (*cuCtxDestroy_v2)(CUcontext ctx); + CUresult (*cuCtxPushCurrent_v2)(CUcontext ctx); + CUresult (*cuCtxPopCurrent_v2)(CUcontext *pctx); + CUresult (*cuGetErrorString)(CUresult error, const char **pStr); + CUresult (*cuMemsetD8_v2)(CUdeviceptr dstDevice, unsigned char uc, size_t N); + CUresult (*cuMemcpy2D_v2)(const CUDA_MEMCPY2D *pCopy); + + CUresult (*cuGraphicsGLRegisterImage)(CUgraphicsResource *pCudaResource, unsigned int image, unsigned int target, unsigned int Flags); + CUresult (*cuGraphicsResourceSetMapFlags)(CUgraphicsResource resource, unsigned int flags); + CUresult (*cuGraphicsMapResources)(unsigned int count, CUgraphicsResource *resources, CUstream hStream); + CUresult (*cuGraphicsUnmapResources)(unsigned int count, CUgraphicsResource *resources, CUstream hStream); + CUresult (*cuGraphicsUnregisterResource)(CUgraphicsResource resource); + CUresult (*cuGraphicsSubResourceGetMappedArray)(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); +} gsr_cuda; + +bool gsr_cuda_load(gsr_cuda *self); +void gsr_cuda_unload(gsr_cuda *self); + +#endif /* GSR_CUDA_H */ diff --git a/include/gl.h b/include/gl.h new file mode 100644 index 0000000..e27da6a --- /dev/null +++ b/include/gl.h @@ -0,0 +1,102 @@ +#ifndef GSR_GL_H +#define GSR_GL_H + +/* OpenGL library with a hidden window context (to allow using the opengl functions) */ + +#include +#include +#include + +typedef XID GLXPixmap; +typedef XID GLXDrawable; +typedef XID GLXWindow; + +typedef struct __GLXcontextRec *GLXContext; +typedef struct __GLXFBConfigRec *GLXFBConfig; + +#define GL_TEXTURE_2D 0x0DE1 +#define GL_RGB 0x1907 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_NEAREST 0x2600 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_LINEAR 0x2601 + +#define GL_RENDERER 0x1F01 + +#define GLX_BUFFER_SIZE 2 +#define GLX_DOUBLEBUFFER 5 +#define GLX_RED_SIZE 8 +#define GLX_GREEN_SIZE 9 +#define GLX_BLUE_SIZE 10 +#define GLX_ALPHA_SIZE 11 +#define GLX_DEPTH_SIZE 12 + +#define GLX_RGBA_BIT 0x00000001 +#define GLX_RENDER_TYPE 0x8011 +#define GLX_FRONT_EXT 0x20DE +#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0 +#define GLX_DRAWABLE_TYPE 0x8010 +#define GLX_WINDOW_BIT 0x00000001 +#define GLX_PIXMAP_BIT 0x00000002 +#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3 +#define GLX_TEXTURE_2D_BIT_EXT 0x00000002 +#define GLX_TEXTURE_TARGET_EXT 0x20D6 +#define GLX_TEXTURE_2D_EXT 0x20DC +#define GLX_TEXTURE_FORMAT_EXT 0x20D5 +#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9 +#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x00000002 +#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 +#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 +#define GLX_CONTEXT_FLAGS_ARB 0x2094 + +typedef struct { + void *library; + Display *dpy; + GLXFBConfig *fbconfigs; + XVisualInfo *visual_info; + GLXFBConfig fbconfig; + Colormap colormap; + GLXContext gl_context; + Window window; + + GLXPixmap (*glXCreatePixmap)(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList); + void (*glXDestroyPixmap)(Display *dpy, GLXPixmap pixmap); + void (*glXBindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list); + void (*glXReleaseTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer); + GLXFBConfig* (*glXChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems); + XVisualInfo* (*glXGetVisualFromFBConfig)(Display *dpy, GLXFBConfig config); + GLXContext (*glXCreateContextAttribsARB)(Display *dpy, GLXFBConfig config, GLXContext share_context, Bool direct, const int *attrib_list); + Bool (*glXMakeContextCurrent)(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); + void (*glXDestroyContext)(Display *dpy, GLXContext ctx); + void (*glXSwapBuffers)(Display *dpy, GLXDrawable drawable); + + void (*glXSwapIntervalEXT)(Display *dpy, GLXDrawable drawable, int interval); + int (*glXSwapIntervalMESA)(unsigned int interval); + int (*glXSwapIntervalSGI)(int interval); + + void (*glClearTexImage)(unsigned int texture, unsigned int level, unsigned int format, unsigned int type, const void *data); + + unsigned int (*glGetError)(void); + const unsigned char* (*glGetString)(unsigned int name); + void (*glClear)(unsigned int mask); + void (*glGenTextures)(int n, unsigned int *textures); + void (*glDeleteTextures)(int n, const unsigned int *texture); + void (*glBindTexture)(unsigned int target, unsigned int texture); + void (*glTexParameteri)(unsigned int target, unsigned int pname, int param); + void (*glGetTexLevelParameteriv)(unsigned int target, int level, unsigned int pname, int *params); + void (*glTexImage2D)(unsigned int target, int level, int internalFormat, int width, int height, int border, unsigned int format, unsigned int type, const void *pixels); + void (*glCopyImageSubData)(unsigned int srcName, unsigned int srcTarget, int srcLevel, int srcX, int srcY, int srcZ, unsigned int dstName, unsigned int dstTarget, int dstLevel, int dstX, int dstY, int dstZ, int srcWidth, int srcHeight, int srcDepth); +} gsr_gl; + +bool gsr_gl_load(gsr_gl *self, Display *dpy); +bool gsr_gl_make_context_current(gsr_gl *self); +void gsr_gl_unload(gsr_gl *self); + +#endif /* GSR_GL_H */ diff --git a/include/library_loader.h b/include/library_loader.h new file mode 100644 index 0000000..d359c5b --- /dev/null +++ b/include/library_loader.h @@ -0,0 +1,41 @@ +#ifndef GSR_LIBRARY_LOADER_H +#define GSR_LIBRARY_LOADER_H + +#include +#include + +typedef struct { + void **func; + const char *name; +} dlsym_assign; + +static void* dlsym_print_fail(void *handle, const char *name, bool required) { + dlerror(); + void *sym = dlsym(handle, name); + char *err_str = dlerror(); + + if(!sym) + fprintf(stderr, "%s: dlsym(handle, \"%s\") failed, error: %s\n", required ? "error" : "warning", name, err_str ? err_str : "(null)"); + + return sym; +} + +/* |dlsyms| should be null terminated */ +static bool dlsym_load_list(void *handle, const dlsym_assign *dlsyms) { + bool success = true; + for(int i = 0; dlsyms[i].func; ++i) { + *dlsyms[i].func = dlsym_print_fail(handle, dlsyms[i].name, true); + if(!*dlsyms[i].func) + success = false; + } + return success; +} + +/* |dlsyms| should be null terminated */ +static void dlsym_load_list_optional(void *handle, const dlsym_assign *dlsyms) { + for(int i = 0; dlsyms[i].func; ++i) { + *dlsyms[i].func = dlsym_print_fail(handle, dlsyms[i].name, false); + } +} + +#endif /* GSR_LIBRARY_LOADER_H */ \ No newline at end of file diff --git a/include/time.h b/include/time.h new file mode 100644 index 0000000..150c655 --- /dev/null +++ b/include/time.h @@ -0,0 +1,6 @@ +#ifndef GSR_TIME_H +#define GSR_TIME_H + +double clock_get_monotonic_seconds(); + +#endif /* GSR_TIME_H */ diff --git a/include/window_texture.h b/include/window_texture.h new file mode 100644 index 0000000..db64191 --- /dev/null +++ b/include/window_texture.h @@ -0,0 +1,28 @@ +#ifndef WINDOW_TEXTURE_H +#define WINDOW_TEXTURE_H + +#include "gl.h" + +typedef struct { + Display *display; + Window window; + Pixmap pixmap; + GLXPixmap glx_pixmap; + unsigned int texture_id; + int redirected; + gsr_gl *gl; +} WindowTexture; + +/* Returns 0 on success */ +int window_texture_init(WindowTexture *window_texture, Display *display, Window window, gsr_gl *gl); +void window_texture_deinit(WindowTexture *self); + +/* + This should ONLY be called when the target window is resized. + Returns 0 on success. +*/ +int window_texture_on_resize(WindowTexture *self); + +unsigned int window_texture_get_opengl_texture_id(WindowTexture *self); + +#endif /* WINDOW_TEXTURE_H */ diff --git a/src/capture/capture.c b/src/capture/capture.c index 9755d6a..699745a 100644 --- a/src/capture/capture.c +++ b/src/capture/capture.c @@ -1,17 +1,47 @@ #include "../../include/capture/capture.h" +#include -int gsr_capture_start(gsr_capture *cap) { - return cap->start(cap); +int gsr_capture_start(gsr_capture *cap, AVCodecContext *video_codec_context) { + if(cap->started) + return -1; + + int res = cap->start(cap, video_codec_context); + if(res == 0) + cap->started = true; + + return res; } -void gsr_capture_stop(gsr_capture *cap) { - cap->stop(cap); +void gsr_capture_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsp_capture_tick failed: the gsr capture has not been started\n"); + return; + } + + if(cap->tick) + cap->tick(cap, video_codec_context, frame); +} + +bool gsr_capture_should_stop(gsr_capture *cap, bool *err) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsr_capture_should_stop failed: the gsr capture has not been started\n"); + return false; + } + + if(!cap->should_stop) + return false; + + return cap->should_stop(cap, err); } int gsr_capture_capture(gsr_capture *cap, AVFrame *frame) { + if(!cap->started) { + fprintf(stderr, "gsr error: gsr_capture_capture failed: the gsr capture has not been started\n"); + return -1; + } return cap->capture(cap, frame); } -void gsr_capture_destroy(gsr_capture *cap) { - cap->destroy(cap); +void gsr_capture_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + cap->destroy(cap, video_codec_context); } diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c index a2648ef..a470879 100644 --- a/src/capture/nvfbc.c +++ b/src/capture/nvfbc.c @@ -1,10 +1,16 @@ #include "../../include/capture/nvfbc.h" #include "../../external/NvFBC.h" +#include "../../include/cuda.h" #include #include #include #include +#include +#include +#include #include +#include +#include typedef struct { gsr_capture_nvfbc_params params; @@ -14,6 +20,8 @@ typedef struct { PNVFBCCREATEINSTANCE nv_fbc_create_instance; NVFBC_API_FUNCTION_LIST nv_fbc_function_list; bool fbc_handle_created; + + gsr_cuda cuda; } gsr_capture_nvfbc; #if defined(_WIN64) || defined(__LP64__) @@ -28,13 +36,16 @@ static int max_int(int a, int b) { } /* Returns 0 on failure */ -static uint32_t get_output_id_from_display_name(NVFBC_RANDR_OUTPUT_INFO *outputs, uint32_t num_outputs, const char *display_name) { +static uint32_t get_output_id_from_display_name(NVFBC_RANDR_OUTPUT_INFO *outputs, uint32_t num_outputs, const char *display_name, uint32_t *width, uint32_t *height) { if(!outputs) return 0; for(uint32_t i = 0; i < num_outputs; ++i) { - if(strcmp(outputs[i].name, display_name) == 0) + if(strcmp(outputs[i].name, display_name) == 0) { + *width = outputs[i].trackedBox.w; + *height = outputs[i].trackedBox.h; return outputs[i].dwId; + } } return 0; @@ -95,16 +106,78 @@ static bool gsr_capture_nvfbc_load_library(gsr_capture *cap) { return true; } -static int gsr_capture_nvfbc_start(gsr_capture *cap) { +#if LIBAVUTIL_VERSION_MAJOR < 57 +static AVBufferRef* dummy_hw_frame_init(int size) { + return av_buffer_alloc(size); +} +#else +static AVBufferRef* dummy_hw_frame_init(size_t size) { + return av_buffer_alloc(size); +} +#endif + +static bool ffmpeg_create_cuda_contexts(gsr_capture_nvfbc *cap_nvfbc, AVCodecContext *video_codec_context) { + AVBufferRef *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!device_ctx) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hardware device context\n"); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = cap_nvfbc->cuda.cu_ctx; + if(av_hwdevice_ctx_init(device_ctx) < 0) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hardware device context\n"); + av_buffer_unref(&device_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + hw_frame_context->pool = av_buffer_pool_init(1, dummy_hw_frame_init); + hw_frame_context->initial_pool_size = 1; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "gsr error: cuda_create_codec_context failed: failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + av_buffer_unref(&frame_context); + return false; + } + + video_codec_context->hw_device_ctx = device_ctx; + video_codec_context->hw_frames_ctx = frame_context; + return true; +} + +static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; + if(!gsr_cuda_load(&cap_nvfbc->cuda)) + return -1; + + if(!gsr_capture_nvfbc_load_library(cap)) { + gsr_cuda_unload(&cap_nvfbc->cuda); + return -1; + } + const uint32_t x = max_int(cap_nvfbc->params.pos.x, 0); const uint32_t y = max_int(cap_nvfbc->params.pos.y, 0); const uint32_t width = max_int(cap_nvfbc->params.size.x, 0); const uint32_t height = max_int(cap_nvfbc->params.size.y, 0); - if(!cap_nvfbc->library || !cap_nvfbc->params.display_to_capture || cap_nvfbc->fbc_handle_created) - return -1; - const bool capture_region = (x > 0 || y > 0 || width > 0 || height > 0); NVFBCSTATUS status; @@ -127,7 +200,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { status = cap_nvfbc->nv_fbc_function_list.nvFBCCreateHandle(&cap_nvfbc->nv_fbc_handle, &create_params); if(status != NVFBC_SUCCESS) { fprintf(stderr, "gsr error: gsr_capture_nvfbc_start failed: %s\n", cap_nvfbc->nv_fbc_function_list.nvFBCGetLastErrorStr(cap_nvfbc->nv_fbc_handle)); - return -1; + goto error_cleanup; } } cap_nvfbc->fbc_handle_created = true; @@ -147,6 +220,8 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } + uint32_t tracking_width = XWidthOfScreen(DefaultScreenOfDisplay(cap_nvfbc->params.dpy)); + uint32_t tracking_height = XHeightOfScreen(DefaultScreenOfDisplay(cap_nvfbc->params.dpy)); tracking_type = strcmp(cap_nvfbc->params.display_to_capture, "screen") == 0 ? NVFBC_TRACKING_SCREEN : NVFBC_TRACKING_OUTPUT; if(tracking_type == NVFBC_TRACKING_OUTPUT) { if(!status_params.bXRandRAvailable) { @@ -159,7 +234,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } - output_id = get_output_id_from_display_name(status_params.outputs, status_params.dwOutputNum, cap_nvfbc->params.display_to_capture); + output_id = get_output_id_from_display_name(status_params.outputs, status_params.dwOutputNum, cap_nvfbc->params.display_to_capture, &tracking_width, &tracking_height); if(output_id == 0) { fprintf(stderr, "gsr error: gsr_capture_nvfbc_start failed: display '%s' not found\n", cap_nvfbc->params.display_to_capture); goto error_cleanup; @@ -198,6 +273,17 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { goto error_cleanup; } + if(capture_region) { + video_codec_context->width = width & ~1; + video_codec_context->height = height & ~1; + } else { + video_codec_context->width = tracking_width & ~1; + video_codec_context->height = tracking_height & ~1; + } + + if(!ffmpeg_create_cuda_contexts(cap_nvfbc, video_codec_context)) + goto error_cleanup; + return 0; error_cleanup: @@ -215,17 +301,16 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap) { cap_nvfbc->nv_fbc_function_list.nvFBCDestroyHandle(cap_nvfbc->nv_fbc_handle, &destroy_params); cap_nvfbc->fbc_handle_created = false; } - output_id = 0; + + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + gsr_cuda_unload(&cap_nvfbc->cuda); return -1; } -static void gsr_capture_nvfbc_stop(gsr_capture *cap) { +static void gsr_capture_nvfbc_destroy_session(gsr_capture *cap) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; - /* Intentionally ignore failure on destroy */ - if(!cap_nvfbc->nv_fbc_handle) - return; - NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params; memset(&destroy_capture_params, 0, sizeof(destroy_capture_params)); destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER; @@ -241,8 +326,6 @@ static void gsr_capture_nvfbc_stop(gsr_capture *cap) { static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { gsr_capture_nvfbc *cap_nvfbc = cap->priv; - if(!cap_nvfbc->library || !cap_nvfbc->fbc_handle_created) - return -1; CUdeviceptr cu_device_ptr = 0; @@ -274,18 +357,19 @@ static int gsr_capture_nvfbc_capture(gsr_capture *cap, AVFrame *frame) { return 0; } -static void gsr_capture_nvfbc_destroy(gsr_capture *cap) { - if(cap) { - gsr_capture_nvfbc *cap_nvfbc = cap->priv; - gsr_capture_nvfbc_stop(cap); - if(cap_nvfbc) { - dlclose(cap_nvfbc->library); - free((void*)cap_nvfbc->params.display_to_capture); - free(cap->priv); - cap->priv = NULL; - } - free(cap); +static void gsr_capture_nvfbc_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_nvfbc *cap_nvfbc = cap->priv; + gsr_capture_nvfbc_destroy_session(cap); + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(cap_nvfbc) { + gsr_cuda_unload(&cap_nvfbc->cuda); + dlclose(cap_nvfbc->library); + free((void*)cap_nvfbc->params.display_to_capture); + free(cap->priv); + cap->priv = NULL; } + free(cap); } gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { @@ -294,6 +378,11 @@ gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { return NULL; } + if(!params->display_to_capture) { + fprintf(stderr, "gsr error: gsr_capture_nvfbc_create params.display_to_capture is NULL\n"); + return NULL; + } + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); if(!cap) return NULL; @@ -317,16 +406,12 @@ gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params) { *cap = (gsr_capture) { .start = gsr_capture_nvfbc_start, - .stop = gsr_capture_nvfbc_stop, + .tick = NULL, + .should_stop = NULL, .capture = gsr_capture_nvfbc_capture, .destroy = gsr_capture_nvfbc_destroy, .priv = cap_nvfbc }; - if(!gsr_capture_nvfbc_load_library(cap)) { - gsr_capture_nvfbc_destroy(cap); - return NULL; - } - return cap; } diff --git a/src/capture/xcomposite.c b/src/capture/xcomposite.c new file mode 100644 index 0000000..755ac92 --- /dev/null +++ b/src/capture/xcomposite.c @@ -0,0 +1,517 @@ +#include "../../include/capture/xcomposite.h" +#include "../../include/gl.h" +#include "../../include/cuda.h" +#include "../../include/window_texture.h" +#include "../../include/time.h" +#include +#include +#include +#include +#include + +/* TODO: Proper error checks and cleanups */ + +typedef struct { + gsr_capture_xcomposite_params params; + Display *dpy; + XEvent xev; + bool should_stop; + bool stop_is_error; + bool window_resized; + bool created_hw_frame; + double window_resize_timer; + + vec2i window_size; + vec2i window_pos; + + unsigned int target_texture_id; + vec2i texture_size; + Window composite_window; + WindowTexture window_texture; + + CUgraphicsResource cuda_graphics_resource; + CUarray mapped_array; + + gsr_gl gl; + gsr_cuda cuda; +} gsr_capture_xcomposite; + +static int max_int(int a, int b) { + return a > b ? a : b; +} + +static int min_int(int a, int b) { + return a < b ? a : b; +} + +static void gsr_capture_xcomposite_stop(gsr_capture *cap, AVCodecContext *video_codec_context); + +static Window get_compositor_window(Display *display) { + Window overlay_window = XCompositeGetOverlayWindow(display, DefaultRootWindow(display)); + XCompositeReleaseOverlayWindow(display, DefaultRootWindow(display)); + + Window root_window, parent_window; + Window *children = NULL; + unsigned int num_children = 0; + if(XQueryTree(display, overlay_window, &root_window, &parent_window, &children, &num_children) == 0) + return None; + + Window compositor_window = None; + if(num_children == 1) { + compositor_window = children[0]; + const int screen_width = XWidthOfScreen(DefaultScreenOfDisplay(display)); + const int screen_height = XHeightOfScreen(DefaultScreenOfDisplay(display)); + + XWindowAttributes attr; + if(!XGetWindowAttributes(display, compositor_window, &attr) || attr.width != screen_width || attr.height != screen_height) + compositor_window = None; + } + + if(children) + XFree(children); + + return compositor_window; +} + +/* TODO: check for glx swap control extension string (GLX_EXT_swap_control, etc) */ +static void set_vertical_sync_enabled(Display *display, Window window, gsr_gl *gl, bool enabled) { + int result = 0; + + if(gl->glXSwapIntervalEXT) { + gl->glXSwapIntervalEXT(display, window, enabled ? 1 : 0); + } else if(gl->glXSwapIntervalMESA) { + result = gl->glXSwapIntervalMESA(enabled ? 1 : 0); + } else if(gl->glXSwapIntervalSGI) { + result = gl->glXSwapIntervalSGI(enabled ? 1 : 0); + } else { + static int warned = 0; + if (!warned) { + warned = 1; + fprintf(stderr, "Warning: setting vertical sync not supported\n"); + } + } + + if(result != 0) + fprintf(stderr, "Warning: setting vertical sync failed\n"); +} + +static bool cuda_register_opengl_texture(gsr_capture_xcomposite *cap_xcomp) { + CUresult res; + CUcontext old_ctx; + res = cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + res = cap_xcomp->cuda.cuGraphicsGLRegisterImage( + &cap_xcomp->cuda_graphics_resource, cap_xcomp->target_texture_id, GL_TEXTURE_2D, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_xcomp->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, + "Error: cuGraphicsGLRegisterImage failed, error %s, texture " + "id: %u\n", + err_str, cap_xcomp->target_texture_id); + return false; + } + + /* Get texture */ + res = cap_xcomp->cuda.cuGraphicsResourceSetMapFlags(cap_xcomp->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_xcomp->cuda.cuGraphicsMapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + + /* Map texture to cuda array */ + res = cap_xcomp->cuda.cuGraphicsSubResourceGetMappedArray(&cap_xcomp->mapped_array, cap_xcomp->cuda_graphics_resource, 0, 0); + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return true; +} + +static bool cuda_create_codec_context(gsr_capture_xcomposite *cap_xcomp, AVCodecContext *video_codec_context) { + CUcontext old_ctx; + cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + AVBufferRef *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!device_ctx) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = cap_xcomp->cuda.cu_ctx; + if(av_hwdevice_ctx_init(device_ctx) < 0) { + fprintf(stderr, "Error: Failed to create hardware device context\n"); + av_buffer_unref(&device_ctx); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + if(!frame_context) { + fprintf(stderr, "Error: Failed to create hwframe context\n"); + av_buffer_unref(&device_ctx); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = + (AVHWFramesContext *)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "Error: Failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&device_ctx); + av_buffer_unref(&frame_context); + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return false; + } + + video_codec_context->hw_device_ctx = device_ctx; + video_codec_context->hw_frames_ctx = frame_context; + return true; +} + +static unsigned int gl_create_texture(gsr_capture_xcomposite *cap_xcomp, int width, int height) { + // Generating this second texture is needed because + // cuGraphicsGLRegisterImage cant be used with the texture that is mapped + // directly to the pixmap. + // TODO: Investigate if it's somehow possible to use the pixmap texture + // directly, this should improve performance since only less image copy is + // then needed every frame. + // Ignoring failure for now.. TODO: Show proper error + unsigned int texture_id = 0; + cap_xcomp->gl.glGenTextures(1, &texture_id); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, texture_id); + cap_xcomp->gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + cap_xcomp->gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + return texture_id; +} + +static int gsr_capture_xcomposite_start(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + XWindowAttributes attr; + if(!XGetWindowAttributes(cap_xcomp->dpy, cap_xcomp->params.window, &attr)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start failed: invalid window id: %lu\n", cap_xcomp->params.window); + return -1; + } + + cap_xcomp->window_size.x = max_int(attr.width, 0); + cap_xcomp->window_size.y = max_int(attr.height, 0); + Window c; + XTranslateCoordinates(cap_xcomp->dpy, cap_xcomp->params.window, DefaultRootWindow(cap_xcomp->dpy), 0, 0, &cap_xcomp->window_pos.x, &cap_xcomp->window_pos.y, &c); + + XSelectInput(cap_xcomp->dpy, cap_xcomp->params.window, StructureNotifyMask | ExposureMask); + + if(!gsr_gl_load(&cap_xcomp->gl, cap_xcomp->dpy)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to load opengl\n"); + return -1; + } + + set_vertical_sync_enabled(cap_xcomp->dpy, cap_xcomp->gl.window, &cap_xcomp->gl, false); + if(window_texture_init(&cap_xcomp->window_texture, cap_xcomp->dpy, cap_xcomp->params.window, &cap_xcomp->gl) != 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed get window texture for window %ld\n", cap_xcomp->params.window); + gsr_gl_unload(&cap_xcomp->gl); + return -1; + } + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->texture_size.x = 0; + cap_xcomp->texture_size.y = 0; + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x); + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->texture_size.x = max_int(2, cap_xcomp->texture_size.x & ~1); + cap_xcomp->texture_size.y = max_int(2, cap_xcomp->texture_size.y & ~1); + + cap_xcomp->target_texture_id = gl_create_texture(cap_xcomp, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y); + if(cap_xcomp->target_texture_id == 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to create opengl texture\n"); + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + video_codec_context->width = cap_xcomp->texture_size.x; + video_codec_context->height = cap_xcomp->texture_size.y; + + if(!gsr_cuda_load(&cap_xcomp->cuda)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_create_codec_context(cap_xcomp, video_codec_context)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + if(!cuda_register_opengl_texture(cap_xcomp)) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + return -1; + } + + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + return 0; +} + +static void gsr_capture_xcomposite_stop(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + window_texture_deinit(&cap_xcomp->window_texture); + + if(cap_xcomp->target_texture_id) { + cap_xcomp->gl.glDeleteTextures(1, &cap_xcomp->target_texture_id); + cap_xcomp->target_texture_id = 0; + } + + if(cap_xcomp->composite_window) { + XCompositeUnredirectWindow(cap_xcomp->dpy, cap_xcomp->composite_window, CompositeRedirectAutomatic); + cap_xcomp->composite_window = None; + } + + av_buffer_unref(&video_codec_context->hw_device_ctx); + av_buffer_unref(&video_codec_context->hw_frames_ctx); + + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); + gsr_cuda_unload(&cap_xcomp->cuda); + + gsr_gl_unload(&cap_xcomp->gl); + if(cap_xcomp->dpy) { + XCloseDisplay(cap_xcomp->dpy); + cap_xcomp->dpy = NULL; + } +} + +static void gsr_capture_xcomposite_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + cap_xcomp->gl.glClear(GL_COLOR_BUFFER_BIT); + + if(!cap_xcomp->created_hw_frame) { + CUcontext old_ctx; + cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: av_hwframe_get_buffer failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + cap_xcomp->created_hw_frame = true; + cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, DestroyNotify, &cap_xcomp->xev)) { + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = false; + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, Expose, &cap_xcomp->xev) && cap_xcomp->xev.xexpose.count == 0) { + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + cap_xcomp->window_resized = true; + } + + if(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, ConfigureNotify, &cap_xcomp->xev) && cap_xcomp->xev.xconfigure.window == cap_xcomp->params.window) { + while(XCheckTypedWindowEvent(cap_xcomp->dpy, cap_xcomp->params.window, ConfigureNotify, &cap_xcomp->xev)) {} + Window c; + XTranslateCoordinates(cap_xcomp->dpy, cap_xcomp->params.window, DefaultRootWindow(cap_xcomp->dpy), 0, 0, &cap_xcomp->xev.xconfigure.x, &cap_xcomp->xev.xconfigure.y, &c); + cap_xcomp->window_pos.x = cap_xcomp->xev.xconfigure.x; + cap_xcomp->window_pos.y = cap_xcomp->xev.xconfigure.y; + + /* Window resize */ + if(cap_xcomp->xev.xconfigure.width != cap_xcomp->window_size.x || cap_xcomp->xev.xconfigure.height != cap_xcomp->window_size.y) { + cap_xcomp->window_size.x = max_int(cap_xcomp->xev.xconfigure.width, 0); + cap_xcomp->window_size.y = max_int(cap_xcomp->xev.xconfigure.height, 0); + cap_xcomp->window_resize_timer = clock_get_monotonic_seconds(); + cap_xcomp->window_resized = true; + } + } + + const double window_resize_timeout = 1.0; // 1 second + if(cap_xcomp->window_resized && clock_get_monotonic_seconds() - cap_xcomp->window_resize_timer >= window_resize_timeout) { + cap_xcomp->window_resized = false; + fprintf(stderr, "Resize window!\n"); + if(window_texture_on_resize(&cap_xcomp->window_texture) != 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: window_texture_on_resize failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + return; + } + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + cap_xcomp->texture_size.x = 0; + cap_xcomp->texture_size.y = 0; + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &cap_xcomp->texture_size.x); + cap_xcomp->gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + cap_xcomp->texture_size.x = min_int(video_codec_context->width, max_int(2, cap_xcomp->texture_size.x & ~1)); + cap_xcomp->texture_size.y = min_int(video_codec_context->height, max_int(2, cap_xcomp->texture_size.y & ~1)); + + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_texture_id); + cap_xcomp->gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + cap_xcomp->gl.glBindTexture(GL_TEXTURE_2D, 0); + + CUcontext old_ctx; + CUresult res = cap_xcomp->cuda.cuCtxPushCurrent_v2(cap_xcomp->cuda.cu_ctx); + + cap_xcomp->cuda.cuGraphicsUnmapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + cap_xcomp->cuda.cuGraphicsUnregisterResource(cap_xcomp->cuda_graphics_resource); + res = cap_xcomp->cuda.cuGraphicsGLRegisterImage(&cap_xcomp->cuda_graphics_resource, cap_xcomp->target_texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cap_xcomp->cuda.cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: cuGraphicsGLRegisterImage failed, error %s, texture id: %u\n", err_str, cap_xcomp->target_texture_id); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + res = cap_xcomp->cuda.cuGraphicsResourceSetMapFlags(cap_xcomp->cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cap_xcomp->cuda.cuGraphicsMapResources(1, &cap_xcomp->cuda_graphics_resource, 0); + res = cap_xcomp->cuda.cuGraphicsSubResourceGetMappedArray(&cap_xcomp->mapped_array, cap_xcomp->cuda_graphics_resource, 0, 0); + + av_frame_free(frame); + *frame = av_frame_alloc(); + if(!frame) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: failed to allocate frame\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + (*frame)->format = video_codec_context->pix_fmt; + (*frame)->width = video_codec_context->width; + (*frame)->height = video_codec_context->height; + + if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_tick: av_hwframe_get_buffer failed\n"); + cap_xcomp->should_stop = true; + cap_xcomp->stop_is_error = true; + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + return; + } + + // Make it completely black to clear unused parts + // TODO: cuMemsetD32? + res = cap_xcomp->cuda.cuMemsetD8_v2((CUdeviceptr)(*frame)->data[0], 0, (*frame)->width * (*frame)->height * 4); + res = cap_xcomp->cuda.cuCtxPopCurrent_v2(&old_ctx); + } +} + +static bool gsr_capture_xcomposite_should_stop(gsr_capture *cap, bool *err) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + if(cap_xcomp->should_stop) { + if(err) + *err = cap_xcomp->stop_is_error; + return true; + } + + if(err) + *err = false; + return false; +} + +static int gsr_capture_xcomposite_capture(gsr_capture *cap, AVFrame *frame) { + gsr_capture_xcomposite *cap_xcomp = cap->priv; + + // TODO: Use a framebuffer instead. glCopyImageSubData requires opengl 4.2 + vec2i source_pos = { 0, 0 }; + vec2i source_size = cap_xcomp->texture_size; + + // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2. + cap_xcomp->gl.glCopyImageSubData( + window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), GL_TEXTURE_2D, 0, source_pos.x, source_pos.y, 0, + cap_xcomp->target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, + source_size.x, source_size.y, 1); + unsigned int err = cap_xcomp->gl.glGetError(); + if(err != 0) { + static bool error_shown = false; + if(!error_shown) { + error_shown = true; + fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); + } + } + cap_xcomp->gl.glXSwapBuffers(cap_xcomp->dpy, cap_xcomp->gl.window); + // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id + + frame->linesize[0] = frame->width * 4; + + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = cap_xcomp->mapped_array; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; + memcpy_struct.dstPitch = frame->linesize[0]; + memcpy_struct.WidthInBytes = frame->width * 4; + memcpy_struct.Height = frame->height; + cap_xcomp->cuda.cuMemcpy2D_v2(&memcpy_struct); + + return 0; +} + +static void gsr_capture_xcomposite_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { + gsr_capture_xcomposite_stop(cap, video_codec_context); + if(cap->priv) { + free(cap->priv); + cap->priv = NULL; + } + free(cap); +} + +gsr_capture* gsr_capture_xcomposite_create(const gsr_capture_xcomposite_params *params) { + if(!params) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_create params is NULL\n"); + return NULL; + } + + gsr_capture *cap = calloc(1, sizeof(gsr_capture)); + if(!cap) + return NULL; + + gsr_capture_xcomposite *cap_xcomp = calloc(1, sizeof(gsr_capture_xcomposite)); + if(!cap_xcomp) { + free(cap); + return NULL; + } + + Display *display = XOpenDisplay(NULL); + if(!display) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_create failed: XOpenDisplay failed\n"); + free(cap); + free(cap_xcomp); + return NULL; + } + + cap_xcomp->dpy = display; + cap_xcomp->params = *params; + + *cap = (gsr_capture) { + .start = gsr_capture_xcomposite_start, + .tick = gsr_capture_xcomposite_tick, + .should_stop = gsr_capture_xcomposite_should_stop, + .capture = gsr_capture_xcomposite_capture, + .destroy = gsr_capture_xcomposite_destroy, + .priv = cap_xcomp + }; + + return cap; +} diff --git a/src/cuda.c b/src/cuda.c new file mode 100644 index 0000000..0c42d74 --- /dev/null +++ b/src/cuda.c @@ -0,0 +1,100 @@ +#include "../include/cuda.h" +#include "../include/library_loader.h" +#include + +bool gsr_cuda_load(gsr_cuda *self) { + memset(self, 0, sizeof(gsr_cuda)); + + dlerror(); /* clear */ + void *lib = dlopen("libcuda.so.1", RTLD_LAZY); + if(!lib) { + lib = dlopen("libcuda.so", RTLD_LAZY); + if(!lib) { + fprintf(stderr, "gsr error: gsr_cuda_load failed: failed to load libcuda.so/libcuda.so.1, error: %s\n", dlerror()); + return false; + } + } + + dlsym_assign required_dlsym[] = { + { (void**)&self->cuInit, "cuInit" }, + { (void**)&self->cuDeviceGetCount, "cuDeviceGetCount" }, + { (void**)&self->cuDeviceGet, "cuDeviceGet" }, + { (void**)&self->cuCtxCreate_v2, "cuCtxCreate_v2" }, + { (void**)&self->cuCtxDestroy_v2, "cuCtxDestroy_v2" }, + { (void**)&self->cuCtxPushCurrent_v2, "cuCtxPushCurrent_v2" }, + { (void**)&self->cuCtxPopCurrent_v2, "cuCtxPopCurrent_v2" }, + { (void**)&self->cuGetErrorString, "cuGetErrorString" }, + { (void**)&self->cuMemsetD8_v2, "cuMemsetD8_v2" }, + { (void**)&self->cuMemcpy2D_v2, "cuMemcpy2D_v2" }, + + { (void**)&self->cuGraphicsGLRegisterImage, "cuGraphicsGLRegisterImage" }, + { (void**)&self->cuGraphicsResourceSetMapFlags, "cuGraphicsResourceSetMapFlags" }, + { (void**)&self->cuGraphicsMapResources, "cuGraphicsMapResources" }, + { (void**)&self->cuGraphicsUnmapResources, "cuGraphicsUnmapResources" }, + { (void**)&self->cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource" }, + { (void**)&self->cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray" }, + + { NULL, NULL } + }; + + if(!dlsym_load_list(lib, required_dlsym)) { + fprintf(stderr, "gsr error: gsr_cuda_load failed: missing required symbols in libcuda.so/libcuda.so.1\n"); + dlclose(lib); + memset(self, 0, sizeof(gsr_cuda)); + return false; + } + + CUresult res; + + res = self->cuInit(0); + if(res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + self->cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: gsr_cuda_load failed: cuInit failed, error: %s (result: %d)\n", err_str, res); + goto fail; + } + + int nGpu = 0; + self->cuDeviceGetCount(&nGpu); + if(nGpu <= 0) { + fprintf(stderr, "gsr error: gsr_cuda_load failed: no cuda supported devices found\n"); + goto fail; + } + + CUdevice cu_dev; + res = self->cuDeviceGet(&cu_dev, 0); + if(res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + self->cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: gsr_cuda_load failed: unable to get CUDA device, error: %s (result: %d)\n", err_str, res); + goto fail; + } + + res = self->cuCtxCreate_v2(&self->cu_ctx, CU_CTX_SCHED_AUTO, cu_dev); + if(res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + self->cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: gsr_cuda_load failed: unable to create CUDA context, error: %s (result: %d)\n", err_str, res); + goto fail; + } + + self->library = lib; + return true; + + fail: + dlclose(lib); + memset(self, 0, sizeof(gsr_cuda)); + return false; +} + +void gsr_cuda_unload(gsr_cuda *self) { + if(self->library) { + if(self->cu_ctx) { + self->cuCtxDestroy_v2(self->cu_ctx); + self->cu_ctx = 0; + } + + dlclose(self->library); + memset(self, 0, sizeof(gsr_cuda)); + } +} diff --git a/src/gl.c b/src/gl.c new file mode 100644 index 0000000..2f471aa --- /dev/null +++ b/src/gl.c @@ -0,0 +1,198 @@ +#include "../include/gl.h" +#include "../include/library_loader.h" +#include + +static bool gsr_gl_create_window(gsr_gl *self) { + const int attr[] = { + GLX_RENDER_TYPE, GLX_RGBA_BIT, + GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, + GLX_DOUBLEBUFFER, True, + GLX_RED_SIZE, 8, + GLX_GREEN_SIZE, 8, + GLX_BLUE_SIZE, 8, + GLX_ALPHA_SIZE, 8, + GLX_DEPTH_SIZE, 0, + None + }; + + GLXFBConfig *fbconfigs = NULL; + XVisualInfo *visual_info = NULL; + GLXFBConfig fbconfig = NULL; + Colormap colormap = None; + GLXContext gl_context = NULL; + Window window = None; + + int numfbconfigs = 0; + fbconfigs = self->glXChooseFBConfig(self->dpy, DefaultScreen(self->dpy), attr, &numfbconfigs); + for(int i = 0; i < numfbconfigs; i++) { + visual_info = self->glXGetVisualFromFBConfig(self->dpy, fbconfigs[i]); + if(!visual_info) + continue; + + fbconfig = fbconfigs[i]; + break; + } + + if(!visual_info) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: no appropriate visual found\n"); + XFree(fbconfigs); + return false; + } + + /* TODO: Core profile? GLX_CONTEXT_CORE_PROFILE_BIT_ARB. */ + /* TODO: Remove need for 4.2 when copy texture function has been removed. */ + int context_attribs[] = { + GLX_CONTEXT_MAJOR_VERSION_ARB, 4, + GLX_CONTEXT_MINOR_VERSION_ARB, 2, + GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, + None + }; + + gl_context = self->glXCreateContextAttribsARB(self->dpy, fbconfig, NULL, True, context_attribs); + if(!gl_context) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to create gl context\n"); + goto fail; + } + + colormap = XCreateColormap(self->dpy, DefaultRootWindow(self->dpy), visual_info->visual, AllocNone); + if(!colormap) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to create x11 colormap\n"); + goto fail; + } + + XSetWindowAttributes window_attr; + window_attr.colormap = colormap; + + // TODO: Is there a way to remove the need to create a window? + window = XCreateWindow(self->dpy, DefaultRootWindow(self->dpy), 0, 0, 1, 1, 0, visual_info->depth, InputOutput, visual_info->visual, CWColormap, &window_attr); + + if(!window) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to create gl window\n"); + goto fail; + } + + if(!self->glXMakeContextCurrent(self->dpy, window, window, gl_context)) { + fprintf(stderr, "gsr error: gsr_gl_create_window failed: failed to make gl context current\n"); + goto fail; + } + + self->fbconfigs = fbconfigs; + self->visual_info = visual_info; + self->colormap = colormap; + self->gl_context = gl_context; + self->window = window; + return true; + + fail: + if(window) + XDestroyWindow(self->dpy, window); + if(colormap) + XFreeColormap(self->dpy, colormap); + if(gl_context) + self->glXDestroyContext(self->dpy, gl_context); + if(visual_info) + XFree(visual_info); + XFree(fbconfigs); + return False; +} + +bool gsr_gl_load(gsr_gl *self, Display *dpy) { + memset(self, 0, sizeof(gsr_gl)); + self->dpy = dpy; + + dlerror(); /* clear */ + void *lib = dlopen("libGL.so.1", RTLD_LAZY); + if(!lib) { + fprintf(stderr, "gsr error: gsr_gl_load: failed to load libGL.so.1, error: %s\n", dlerror()); + return false; + } + + dlsym_assign optional_dlsym[] = { + { (void**)&self->glClearTexImage, "glClearTexImage" }, + { (void**)&self->glXSwapIntervalEXT, "glXSwapIntervalEXT" }, + { (void**)&self->glXSwapIntervalMESA, "glXSwapIntervalMESA" }, + { (void**)&self->glXSwapIntervalSGI, "glXSwapIntervalSGI" }, + + { NULL, NULL } + }; + + dlsym_load_list_optional(lib, optional_dlsym); + + dlsym_assign required_dlsym[] = { + { (void**)&self->glXCreatePixmap, "glXCreatePixmap" }, + { (void**)&self->glXDestroyPixmap, "glXDestroyPixmap" }, + { (void**)&self->glXBindTexImageEXT, "glXBindTexImageEXT" }, + { (void**)&self->glXReleaseTexImageEXT, "glXReleaseTexImageEXT" }, + { (void**)&self->glXChooseFBConfig, "glXChooseFBConfig" }, + { (void**)&self->glXGetVisualFromFBConfig, "glXGetVisualFromFBConfig" }, + { (void**)&self->glXCreateContextAttribsARB, "glXCreateContextAttribsARB" }, + { (void**)&self->glXMakeContextCurrent, "glXMakeContextCurrent" }, + { (void**)&self->glXDestroyContext, "glXDestroyContext" }, + { (void**)&self->glXSwapBuffers, "glXSwapBuffers" }, + + { (void**)&self->glGetError, "glGetError" }, + { (void**)&self->glGetString, "glGetString" }, + { (void**)&self->glClear, "glClear" }, + { (void**)&self->glGenTextures, "glGenTextures" }, + { (void**)&self->glDeleteTextures, "glDeleteTextures" }, + { (void**)&self->glBindTexture, "glBindTexture" }, + { (void**)&self->glTexParameteri, "glTexParameteri" }, + { (void**)&self->glGetTexLevelParameteriv, "glGetTexLevelParameteriv" }, + { (void**)&self->glTexImage2D, "glTexImage2D" }, + { (void**)&self->glCopyImageSubData, "glCopyImageSubData" }, + + { NULL, NULL } + }; + + if(!dlsym_load_list(lib, required_dlsym)) { + fprintf(stderr, "gsr error: gsr_gl_load failed: missing required symbols in libGL.so.1\n"); + dlclose(lib); + memset(self, 0, sizeof(gsr_gl)); + return false; + } + + if(!gsr_gl_create_window(self)) { + dlclose(lib); + memset(self, 0, sizeof(gsr_gl)); + return false; + } + + self->library = lib; + return true; +} + +bool gsr_gl_make_context_current(gsr_gl *self) { + return self->glXMakeContextCurrent(self->dpy, self->window, self->window, self->gl_context); +} + +void gsr_gl_unload(gsr_gl *self) { + if(self->window) { + XDestroyWindow(self->dpy, self->window); + self->window = None; + } + + if(self->colormap) { + XFreeColormap(self->dpy, self->colormap); + self->colormap = None; + } + + if(self->gl_context) { + self->glXDestroyContext(self->dpy, self->gl_context); + self->gl_context = NULL; + } + + if(self->visual_info) { + XFree(self->visual_info); + self->visual_info = NULL; + } + + if(self->fbconfigs) { + XFree(self->fbconfigs); + self->fbconfigs = NULL; + } + + if(self->library) { + dlclose(self->library); + memset(self, 0, sizeof(gsr_gl)); + } +} diff --git a/src/main.cpp b/src/main.cpp index 8e80797..a227f12 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,22 +1,8 @@ -/* - Copyright (C) 2020 dec05eba - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - extern "C" { #include "../include/capture/nvfbc.h" +#include "../include/capture/xcomposite.h" +#include "../include/gl.h" +#include "../include/time.h" } #include @@ -35,29 +21,19 @@ extern "C" { #include #include "../include/sound.hpp" -#include "../include/CudaLibrary.hpp" -#include "../include/GlLibrary.hpp" -#include #include -//#include extern "C" { #include #include #include -#include -#include #include #include #include #include } -extern "C" { -#include -} - #include #include @@ -67,9 +43,6 @@ static const int VIDEO_STREAM_INDEX = 0; static thread_local char av_error_buffer[AV_ERROR_MAX_STRING_SIZE]; -static Cuda cuda; -static GlLibrary gl; - static const XRRModeInfo* get_mode_info(const XRRScreenResources *sr, RRMode id) { for(int i = 0; i < sr->nmode; ++i) { if(sr->modes[i].id == id) @@ -145,30 +118,6 @@ static char* av_error_to_string(int err) { return av_error_buffer; } -struct ScopedGLXFBConfig { - ~ScopedGLXFBConfig() { - if (configs) - XFree(configs); - } - - GLXFBConfig *configs = nullptr; -}; - -struct WindowPixmap { - Pixmap pixmap = None; - GLXPixmap glx_pixmap = None; - unsigned int texture_id = 0; - unsigned int target_texture_id = 0; - - int texture_width = 0; - int texture_height = 0; - - int texture_real_width = 0; - int texture_real_height = 0; - - Window composite_window = None; -}; - enum class VideoQuality { MEDIUM, HIGH, @@ -181,46 +130,7 @@ enum class VideoCodec { H265 }; -static double clock_get_monotonic_seconds() { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 0; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; -} - -static bool x11_supports_composite_named_window_pixmap(Display *dpy) { - int extension_major; - int extension_minor; - if (!XCompositeQueryExtension(dpy, &extension_major, &extension_minor)) - return false; - - int major_version; - int minor_version; - return XCompositeQueryVersion(dpy, &major_version, &minor_version) && - (major_version > 0 || minor_version >= 2); -} - static int x11_error_handler(Display *dpy, XErrorEvent *ev) { -#if 0 - char type_str[128]; - XGetErrorText(dpy, ev->type, type_str, sizeof(type_str)); - - char major_opcode_str[128]; - XGetErrorText(dpy, ev->type, major_opcode_str, sizeof(major_opcode_str)); - - char minor_opcode_str[128]; - XGetErrorText(dpy, ev->type, minor_opcode_str, sizeof(minor_opcode_str)); - - fprintf(stderr, - "X Error of failed request: %s\n" - "Major opcode of failed request: %d (%s)\n" - "Minor opcode of failed request: %d (%s)\n" - "Serial number of failed request: %d\n", - type_str, - ev->request_code, major_opcode_str, - ev->minor_code, minor_opcode_str); -#endif return 0; } @@ -228,350 +138,6 @@ static int x11_io_error_handler(Display *dpy) { return 0; } -static Window get_compositor_window(Display *display) { - Window overlay_window = XCompositeGetOverlayWindow(display, DefaultRootWindow(display)); - XCompositeReleaseOverlayWindow(display, DefaultRootWindow(display)); - - /* - Atom xdnd_proxy = XInternAtom(display, "XdndProxy", False); - if(!xdnd_proxy) - return None; - - Atom type = None; - int format = 0; - unsigned long nitems = 0, after = 0; - unsigned char *data = nullptr; - if(XGetWindowProperty(display, overlay_window, xdnd_proxy, 0, 1, False, XA_WINDOW, &type, &format, &nitems, &after, &data) != Success) - return None; - - fprintf(stderr, "type: %ld, format: %d, num items: %lu\n", type, format, nitems); - if(type == XA_WINDOW && format == 32 && nitems == 1) - fprintf(stderr, "Proxy window: %ld\n", *(Window*)data); - - if(data) - XFree(data); - */ - - Window root_window, parent_window; - Window *children = nullptr; - unsigned int num_children = 0; - if(XQueryTree(display, overlay_window, &root_window, &parent_window, &children, &num_children) == 0) - return None; - - Window compositor_window = None; - if(num_children == 1) { - compositor_window = children[0]; - const int screen_width = XWidthOfScreen(DefaultScreenOfDisplay(display)); - const int screen_height = XHeightOfScreen(DefaultScreenOfDisplay(display)); - - XWindowAttributes attr; - if(!XGetWindowAttributes(display, compositor_window, &attr) || attr.width != screen_width || attr.height != screen_height) - compositor_window = None; - } - - if(children) - XFree(children); - - return compositor_window; -} - -static void cleanup_window_pixmap(Display *dpy, WindowPixmap &pixmap) { - if (pixmap.target_texture_id) { - gl.glDeleteTextures(1, &pixmap.target_texture_id); - pixmap.target_texture_id = 0; - } - - if (pixmap.texture_id) { - gl.glDeleteTextures(1, &pixmap.texture_id); - pixmap.texture_id = 0; - pixmap.texture_width = 0; - pixmap.texture_height = 0; - pixmap.texture_real_width = 0; - pixmap.texture_real_height = 0; - } - - if (pixmap.glx_pixmap) { - gl.glXDestroyPixmap(dpy, pixmap.glx_pixmap); - gl.glXReleaseTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT); - pixmap.glx_pixmap = None; - } - - if (pixmap.pixmap) { - XFreePixmap(dpy, pixmap.pixmap); - pixmap.pixmap = None; - } - - if(pixmap.composite_window) { - XCompositeUnredirectWindow(dpy, pixmap.composite_window, CompositeRedirectAutomatic); - pixmap.composite_window = None; - } -} - -static bool recreate_window_pixmap(Display *dpy, Window window_id, - WindowPixmap &pixmap, bool fallback_composite_window = true) { - cleanup_window_pixmap(dpy, pixmap); - - XWindowAttributes attr; - if (!XGetWindowAttributes(dpy, window_id, &attr)) { - fprintf(stderr, "Failed to get window attributes\n"); - return false; - } - - const int pixmap_config[] = { - GLX_BIND_TO_TEXTURE_RGB_EXT, True, - GLX_DRAWABLE_TYPE, GLX_PIXMAP_BIT | GLX_WINDOW_BIT, - GLX_BIND_TO_TEXTURE_TARGETS_EXT, GLX_TEXTURE_2D_BIT_EXT, - GLX_BUFFER_SIZE, 24, - GLX_RED_SIZE, 8, - GLX_GREEN_SIZE, 8, - GLX_BLUE_SIZE, 8, - GLX_ALPHA_SIZE, 0, - // GLX_Y_INVERTED_EXT, (int)GLX_DONT_CARE, - None}; - - const int pixmap_attribs[] = {GLX_TEXTURE_TARGET_EXT, - GLX_TEXTURE_2D_EXT, - GLX_TEXTURE_FORMAT_EXT, - GLX_TEXTURE_FORMAT_RGB_EXT, - None}; - - int c; - GLXFBConfig *configs = gl.glXChooseFBConfig(dpy, 0, pixmap_config, &c); - if (!configs) { - fprintf(stderr, "Failed too choose fb config\n"); - return false; - } - ScopedGLXFBConfig scoped_configs; - scoped_configs.configs = configs; - - bool found = false; - GLXFBConfig config; - for (int i = 0; i < c; i++) { - config = configs[i]; - XVisualInfo *visual = gl.glXGetVisualFromFBConfig(dpy, config); - if (!visual) - continue; - - if (attr.depth != visual->depth) { - XFree(visual); - continue; - } - XFree(visual); - found = true; - break; - } - - if(!found) { - fprintf(stderr, "No matching fb config found\n"); - return false; - } - - Pixmap new_window_pixmap = XCompositeNameWindowPixmap(dpy, window_id); - if (!new_window_pixmap) { - fprintf(stderr, "Failed to get pixmap for window %ld\n", window_id); - return false; - } - - GLXPixmap glx_pixmap = gl.glXCreatePixmap(dpy, config, new_window_pixmap, pixmap_attribs); - if (!glx_pixmap) { - fprintf(stderr, "Failed to create glx pixmap\n"); - XFreePixmap(dpy, new_window_pixmap); - return false; - } - - pixmap.pixmap = new_window_pixmap; - pixmap.glx_pixmap = glx_pixmap; - - //glEnable(GL_TEXTURE_2D); - gl.glGenTextures(1, &pixmap.texture_id); - gl.glBindTexture(GL_TEXTURE_2D, pixmap.texture_id); - - // glEnable(GL_BLEND); - // glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - - gl.glXBindTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT, NULL); - gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, - GL_NEAREST); // GL_LINEAR ); - gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_NEAREST); // GL_LINEAR);//GL_LINEAR_MIPMAP_LINEAR ); - //glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); - - gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, - &pixmap.texture_width); - gl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, - &pixmap.texture_height); - - pixmap.texture_real_width = pixmap.texture_width; - pixmap.texture_real_height = pixmap.texture_height; - - if(pixmap.texture_width == 0 || pixmap.texture_height == 0) { - gl.glBindTexture(GL_TEXTURE_2D, 0); - pixmap.texture_width = attr.width; - pixmap.texture_height = attr.height; - - pixmap.texture_real_width = pixmap.texture_width; - pixmap.texture_real_height = pixmap.texture_height; - - if(fallback_composite_window) { - Window compositor_window = get_compositor_window(dpy); - if(!compositor_window) { - fprintf(stderr, "Warning: failed to get texture size. You are probably running an unsupported compositor and recording the selected window doesn't work at the moment. This could also happen if you are trying to record a window with client-side decorations. A black window will be displayed instead. A workaround is to record the whole monitor (which uses NvFBC).\n"); - return false; - } - - fprintf(stderr, "Warning: failed to get texture size. You are probably trying to record a window with client-side decorations (using GNOME?). Trying to fallback to recording the compositor proxy window\n"); - XCompositeRedirectWindow(dpy, compositor_window, CompositeRedirectAutomatic); - - // TODO: Target texture should be the same size as the target window, not the size of the composite window - if(recreate_window_pixmap(dpy, compositor_window, pixmap, false)) { - pixmap.composite_window = compositor_window; - pixmap.texture_width = attr.width; - pixmap.texture_height = attr.height; - return true; - } - - pixmap.texture_width = attr.width; - pixmap.texture_height = attr.height; - - return false; - } else { - fprintf(stderr, "Warning: failed to get texture size. You are probably running an unsupported compositor and recording the selected window doesn't work at the moment. This could also happen if you are trying to record a window with client-side decorations. A black window will be displayed instead. A workaround is to record the whole monitor (which uses NvFBC).\n"); - } - } - - fprintf(stderr, "texture width: %d, height: %d\n", pixmap.texture_width, - pixmap.texture_height); - - // Generating this second texture is needed because - // cuGraphicsGLRegisterImage cant be used with the texture that is mapped - // directly to the pixmap. - // TODO: Investigate if it's somehow possible to use the pixmap texture - // directly, this should improve performance since only less image copy is - // then needed every frame. - gl.glGenTextures(1, &pixmap.target_texture_id); - gl.glBindTexture(GL_TEXTURE_2D, pixmap.target_texture_id); - gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, pixmap.texture_width, - pixmap.texture_height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); - unsigned int err2 = gl.glGetError(); - //fprintf(stderr, "error: %d\n", err2); - // glXBindTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT, NULL); - // glGenerateTextureMipmapEXT(glxpixmap, GL_TEXTURE_2D); - - // glGenerateMipmap(GL_TEXTURE_2D); - - // gl.glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); - // gl.glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); - - gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, - GL_NEAREST); // GL_LINEAR ); - gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_NEAREST); // GL_LINEAR);//GL_LINEAR_MIPMAP_LINEAR ); - //glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); - - gl.glBindTexture(GL_TEXTURE_2D, 0); - - return pixmap.texture_id != 0 && pixmap.target_texture_id != 0; -} - -static Window create_opengl_window(Display *display) { - const int attr[] = { - GLX_RENDER_TYPE, GLX_RGBA_BIT, - GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, - GLX_DOUBLEBUFFER, True, - GLX_RED_SIZE, 8, - GLX_GREEN_SIZE, 8, - GLX_BLUE_SIZE, 8, - GLX_ALPHA_SIZE, 8, - GLX_DEPTH_SIZE, 0, - None - }; - - XVisualInfo *visual_info = NULL; - GLXFBConfig fbconfig = NULL; - - int numfbconfigs = 0; - GLXFBConfig *fbconfigs = gl.glXChooseFBConfig(display, DefaultScreen(display), attr, &numfbconfigs); - for(int i = 0; i < numfbconfigs; i++) { - visual_info = gl.glXGetVisualFromFBConfig(display, fbconfigs[i]); - if(!visual_info) - continue; - - fbconfig = fbconfigs[i]; - break; - } - - if(!visual_info) { - fprintf(stderr, "mgl error: no appropriate visual found\n"); - return -1; - } - - // TODO: Core profile? GLX_CONTEXT_CORE_PROFILE_BIT_ARB. - // TODO: Remove need for 4.2 when copy texture function has been removed - int context_attribs[] = { - GLX_CONTEXT_MAJOR_VERSION_ARB, 4, - GLX_CONTEXT_MINOR_VERSION_ARB, 2, - GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, - None - }; - - GLXContext gl_context = gl.glXCreateContextAttribsARB(display, fbconfig, nullptr, True, context_attribs); - if(!gl_context) { - fprintf(stderr, "Error: failed to create gl context\n"); - return None; - } - - Colormap colormap = XCreateColormap(display, DefaultRootWindow(display), visual_info->visual, AllocNone); - if(!colormap) { - fprintf(stderr, "Error: failed to create x11 colormap\n"); - gl.glXDestroyContext(display, gl_context); - } - - XSetWindowAttributes window_attr; - window_attr.colormap = colormap; - - // TODO: Is there a way to remove the need to create a window? - Window window = XCreateWindow(display, DefaultRootWindow(display), 0, 0, 1, 1, 0, visual_info->depth, InputOutput, visual_info->visual, CWColormap, &window_attr); - - if(!window) { - fprintf(stderr, "Error: failed to create gl window\n"); - goto fail; - } - - if(!gl.glXMakeContextCurrent(display, window, window, gl_context)) { - fprintf(stderr, "Error: failed to make gl context current\n"); - goto fail; - } - - return window; - - fail: - XFreeColormap(display, colormap); - gl.glXDestroyContext(display, gl_context); - return None; -} - -/* TODO: check for glx swap control extension string (GLX_EXT_swap_control, etc) */ -static void set_vertical_sync_enabled(Display *display, Window window, bool enabled) { - int result = 0; - - if(gl.glXSwapIntervalEXT) { - gl.glXSwapIntervalEXT(display, window, enabled ? 1 : 0); - } else if(gl.glXSwapIntervalMESA) { - result = gl.glXSwapIntervalMESA(enabled ? 1 : 0); - } else if(gl.glXSwapIntervalSGI) { - result = gl.glXSwapIntervalSGI(enabled ? 1 : 0); - } else { - static int warned = 0; - if (!warned) { - warned = 1; - fprintf(stderr, "Warning: setting vertical sync not supported\n"); - } - } - - if(result != 0) - fprintf(stderr, "Warning: setting vertical sync failed\n"); -} - // |stream| is only required for non-replay mode static void receive_frames(AVCodecContext *av_codec_context, int stream_index, AVStream *stream, AVFrame *frame, AVFormatContext *av_format_context, @@ -641,11 +207,6 @@ static AVCodecContext* create_audio_codec_context(AVFormatContext *av_format_con AVCodecContext *codec_context = avcodec_alloc_context3(codec); assert(codec->type == AVMEDIA_TYPE_AUDIO); - /* - codec_context->sample_fmt = (*codec)->sample_fmts - ? (*codec)->sample_fmts[0] - : AV_SAMPLE_FMT_FLTP; - */ codec_context->codec_id = AV_CODEC_ID_AAC; codec_context->sample_fmt = AV_SAMPLE_FMT_FLTP; //codec_context->bit_rate = 64000; @@ -685,7 +246,6 @@ static const AVCodec* find_h265_encoder() { static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_context, VideoQuality video_quality, - int record_width, int record_height, int fps, const AVCodec *codec, bool is_livestream) { AVCodecContext *codec_context = avcodec_alloc_context3(codec); @@ -694,8 +254,6 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con assert(codec->type == AVMEDIA_TYPE_VIDEO); codec_context->codec_id = codec->id; - codec_context->width = record_width & ~1; - codec_context->height = record_height & ~1; // Timebase: This is the fundamental unit of time (in seconds) in terms // of which frame timestamps are represented. For fixed-fps content, // timebase should be 1/framerate and timestamp increments should be @@ -747,6 +305,9 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con if (codec_context->codec_id == AV_CODEC_ID_MPEG1VIDEO) codec_context->mb_decision = 2; + if(codec_context->codec_id == AV_CODEC_ID_H264) + codec_context->profile = FF_PROFILE_H264_HIGH; + // stream->time_base = codec_context->time_base; // codec_context->ticks_per_frame = 30; //av_opt_set(codec_context->priv_data, "tune", "hq", 0); @@ -808,64 +369,7 @@ static AVFrame* open_audio(AVCodecContext *audio_codec_context) { return frame; } -#if LIBAVUTIL_VERSION_MAJOR < 57 -static AVBufferRef* dummy_hw_frame_init(int size) { - return av_buffer_alloc(size); -} -#else -static AVBufferRef* dummy_hw_frame_init(size_t size) { - return av_buffer_alloc(size); -} -#endif - -static void open_video(AVCodecContext *codec_context, - WindowPixmap &window_pixmap, AVBufferRef **device_ctx, - CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context, bool use_nvfbc, VideoQuality video_quality, bool is_livestream, bool very_old_gpu) { - int ret; - - *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); - if(!*device_ctx) { - fprintf(stderr, "Error: Failed to create hardware device context\n"); - exit(1); - } - - AVHWDeviceContext *hw_device_context = (AVHWDeviceContext *)(*device_ctx)->data; - AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext *)hw_device_context->hwctx; - cuda_device_context->cuda_ctx = cuda_context; - if(av_hwdevice_ctx_init(*device_ctx) < 0) { - fprintf(stderr, "Error: Failed to create hardware device context\n"); - exit(1); - } - - AVBufferRef *frame_context = av_hwframe_ctx_alloc(*device_ctx); - if (!frame_context) { - fprintf(stderr, "Error: Failed to create hwframe context\n"); - exit(1); - } - - AVHWFramesContext *hw_frame_context = - (AVHWFramesContext *)frame_context->data; - hw_frame_context->width = codec_context->width; - hw_frame_context->height = codec_context->height; - hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; - hw_frame_context->format = codec_context->pix_fmt; - hw_frame_context->device_ref = *device_ctx; - hw_frame_context->device_ctx = (AVHWDeviceContext *)(*device_ctx)->data; - - if(use_nvfbc) { - hw_frame_context->pool = av_buffer_pool_init(1, dummy_hw_frame_init); - hw_frame_context->initial_pool_size = 1; - } - - if (av_hwframe_ctx_init(frame_context) < 0) { - fprintf(stderr, "Error: Failed to initialize hardware frame context " - "(note: ffmpeg version needs to be > 4.0\n"); - exit(1); - } - - codec_context->hw_device_ctx = *device_ctx; - codec_context->hw_frames_ctx = frame_context; - +static void open_video(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu) { bool supports_p4 = false; bool supports_p7 = false; @@ -912,9 +416,8 @@ static void open_video(AVCodecContext *codec_context, } } - if(!supports_p4 && !supports_p7) { + if(!supports_p4 && !supports_p7) fprintf(stderr, "Info: your ffmpeg version is outdated. It's recommended that you use the flatpak version of gpu-screen-recorder version instead, which you can find at https://flathub.org/apps/details/com.dec05eba.gpu_screen_recorder\n"); - } //if(is_livestream) { // av_dict_set_int(&options, "zerolatency", 1, 0); @@ -934,38 +437,14 @@ static void open_video(AVCodecContext *codec_context, av_dict_set(&options, "tune", "hq", 0); av_dict_set(&options, "rc", "constqp", 0); - ret = avcodec_open2(codec_context, codec_context->codec, &options); + if(codec_context->codec_id == AV_CODEC_ID_H264) + av_dict_set(&options, "profile", "high", 0); + + int ret = avcodec_open2(codec_context, codec_context->codec, &options); if (ret < 0) { - fprintf(stderr, "Error: Could not open video codec: %s\n", - "blabla"); // av_err2str(ret)); + fprintf(stderr, "Error: Could not open video codec: %s\n", av_error_to_string(ret)); exit(1); } - - if(window_pixmap.target_texture_id != 0) { - CUresult res; - CUcontext old_ctx; - res = cuda.cuCtxPopCurrent_v2(&old_ctx); - res = cuda.cuCtxPushCurrent_v2(cuda_context); - res = cuda.cuGraphicsGLRegisterImage( - cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); - // cuda.cuGraphicsUnregisterResource(*cuda_graphics_resource); - if (res != CUDA_SUCCESS) { - const char *err_str; - cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, - "Error: cuda.cuGraphicsGLRegisterImage failed, error %s, texture " - "id: %u\n", - err_str, window_pixmap.target_texture_id); - exit(1); - } - res = cuda.cuCtxPopCurrent_v2(&old_ctx); - } -} - -static void close_video(AVStream *video_stream, AVFrame *frame) { - // avcodec_close(video_stream->codec); - // av_frame_free(&frame); } static void usage() { @@ -973,7 +452,6 @@ static void usage() { fprintf(stderr, "OPTIONS:\n"); fprintf(stderr, " -w Window to record or a display, \"screen\" or \"screen-direct\". The display is the display name in xrandr and if \"screen\" or \"screen-direct\" is selected then all displays are recorded and they are recorded in h265 (aka hevc)." "\"screen-direct\" skips one texture copy for fullscreen applications so it may lead to better performance and it works with VRR monitors when recording fullscreen application but may break some applications, such as mpv in fullscreen mode. Recording a display requires a gpu with NvFBC support.\n"); - fprintf(stderr, " -s The size (area) to record at in the format WxH, for example 1920x1080. Usually you want to set this to the size of the window. Optional, by default the size of the window (which is passed to -w). This option is only supported when recording a window, not a screen/monitor.\n"); fprintf(stderr, " -c Container format for output file, for example mp4, or flv.\n"); fprintf(stderr, " -f Framerate to record at.\n"); fprintf(stderr, " -a Audio device to record from (pulse audio device). Can be specified multiple times. Each time this is specified a new audio track is added for the specified audio device. A name can be given to the audio input device by prefixing the audio input with /, for example \"dummy/alsa_output.pci-0000_00_1b.0.analog-stereo.monitor\". Optional, no audio track is added by default.\n"); @@ -989,16 +467,8 @@ static void usage() { exit(1); } -static sig_atomic_t started = 0; static sig_atomic_t running = 1; static sig_atomic_t save_replay = 0; -static const char *pid_file = "/tmp/gpu-screen-recorder"; - -static void term_handler(int) { - if(started) - unlink(pid_file); - exit(0); -} static void int_handler(int) { running = 0; @@ -1213,7 +683,6 @@ static bool is_livestream_path(const char *str) { } int main(int argc, char **argv) { - signal(SIGTERM, term_handler); signal(SIGINT, int_handler); signal(SIGUSR1, save_replay_handler); @@ -1222,7 +691,7 @@ int main(int argc, char **argv) { //{ "-s", Arg { nullptr, true } }, { "-c", Arg { {}, false, false } }, { "-f", Arg { {}, false, false } }, - { "-s", Arg { {}, true, false } }, + //{ "-s", Arg { {}, true, false } }, { "-a", Arg { {}, true, true } }, { "-q", Arg { {}, true, false } }, { "-o", Arg { {}, true, false } }, @@ -1297,22 +766,6 @@ int main(int argc, char **argv) { } } - uint32_t region_x = 0; - uint32_t region_y = 0; - uint32_t region_width = 0; - uint32_t region_height = 0; - - /* - TODO: Fix this. Doesn't work for some reason - const char *screen_region = args["-s"].value(); - if(screen_region) { - if(sscanf(screen_region, "%ux%u+%u+%u", ®ion_x, ®ion_y, ®ion_width, ®ion_height) != 4) { - fprintf(stderr, "Invalid value for -s '%s', expected a value in format WxH+X+Y\n", screen_region); - return 1; - } - } - */ - const char *container_format = args["-c"].value(); int fps = atoi(args["-f"].value()); if(fps == 0) { @@ -1351,46 +804,6 @@ int main(int argc, char **argv) { replay_buffer_size_secs += 5; // Add a few seconds to account of lost packets because of non-keyframe packets skipped } - if(!cuda.load()) { - fprintf(stderr, "Error: failed to load cuda\n"); - return 2; - } - - CUresult res; - - res = cuda.cuInit(0); - if(res != CUDA_SUCCESS) { - const char *err_str; - cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, "Error: cuInit failed, error %s (result: %d)\n", err_str, res); - return 1; - } - - int nGpu = 0; - cuda.cuDeviceGetCount(&nGpu); - if (nGpu <= 0) { - fprintf(stderr, "Error: no cuda supported devices found\n"); - return 1; - } - - CUdevice cu_dev; - res = cuda.cuDeviceGet(&cu_dev, 0); - if(res != CUDA_SUCCESS) { - const char *err_str; - cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, "Error: unable to get CUDA device, error: %s (result: %d)\n", err_str, res); - return 1; - } - - CUcontext cu_ctx; - res = cuda.cuCtxCreate_v2(&cu_ctx, CU_CTX_SCHED_AUTO, cu_dev); - if(res != CUDA_SUCCESS) { - const char *err_str; - cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, "Error: unable to create CUDA context, error: %s (result: %d)\n", err_str, res); - return 1; - } - Display *dpy = XOpenDisplay(nullptr); if (!dpy) { fprintf(stderr, "Error: Failed to open display\n"); @@ -1400,23 +813,29 @@ int main(int argc, char **argv) { XSetErrorHandler(x11_error_handler); XSetIOErrorHandler(x11_io_error_handler); - const char *record_area = args["-s"].value(); + gsr_gl gl; + if(!gsr_gl_load(&gl, dpy)) { + fprintf(stderr, "Error: failed to load opengl\n"); + return 1; + } - uint32_t window_width = 0; - uint32_t window_height = 0; - int window_x = 0; - int window_y = 0; + bool very_old_gpu = false; + const unsigned char *gl_renderer = gl.glGetString(GL_RENDERER); + if(gl_renderer) { + int gpu_num = 1000; + sscanf((const char*)gl_renderer, "%*s %*s %*s %d", &gpu_num); + if(gpu_num < 900) { + fprintf(stderr, "Info: your gpu appears to be very old (older than maxwell architecture). Switching to lower preset\n"); + very_old_gpu = true; + } + } - gsr_capture *capture = nullptr; + gsr_gl_unload(&gl); const char *window_str = args["-w"].value(); - Window src_window_id = None; - if(contains_non_hex_number(window_str)) { - if(record_area) { - fprintf(stderr, "Option -s is not supported when recording a monitor/screen\n"); - usage(); - } + gsr_capture *capture = nullptr; + if(contains_non_hex_number(window_str)) { const char *capture_target = window_str; bool direct_capture = strcmp(window_str, "screen-direct") == 0; if(direct_capture) { @@ -1427,52 +846,39 @@ int main(int argc, char **argv) { } gsr_capture_nvfbc_params nvfbc_params; + nvfbc_params.dpy = dpy; nvfbc_params.display_to_capture = capture_target; nvfbc_params.fps = fps; - nvfbc_params.pos = { (int)region_x, (int)region_y }; - nvfbc_params.size = { (int)region_width, (int)region_height }; + nvfbc_params.pos = { 0, 0 }; + nvfbc_params.size = { 0, 0 }; nvfbc_params.direct_capture = direct_capture; capture = gsr_capture_nvfbc_create(&nvfbc_params); if(!capture) return 1; - // TODO: Set window_width and window_height to the nvfbc blalba - if(strcmp(capture_target, "screen") == 0) { - window_width = WidthOfScreen(DefaultScreenOfDisplay(dpy)); - window_height = HeightOfScreen(DefaultScreenOfDisplay(dpy)); - } else { + if(strcmp(capture_target, "screen") != 0) { gsr_monitor gmon; if(!get_monitor_by_name(dpy, capture_target, &gmon)) { fprintf(stderr, "gsr error: display \"%s\" not found, expected one of:\n", capture_target); - fprintf(stderr, " \"screen\" (%dx%d+%d+%d)\n", WidthOfScreen(DefaultScreenOfDisplay(dpy)), HeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); - fprintf(stderr, " \"screen-direct\" (%dx%d+%d+%d)\n", WidthOfScreen(DefaultScreenOfDisplay(dpy)), HeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); + fprintf(stderr, " \"screen\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); + fprintf(stderr, " \"screen-direct\" (%dx%d+%d+%d)\n", XWidthOfScreen(DefaultScreenOfDisplay(dpy)), XHeightOfScreen(DefaultScreenOfDisplay(dpy)), 0, 0); for_each_active_monitor_output(dpy, monitor_output_callback_print, NULL); return 1; } - - window_width = gmon.size.x; - window_height = gmon.size.y; } - - // TODO: Move down - if(gsr_capture_start(capture) != 0) - return 1; } else { errno = 0; - src_window_id = strtol(window_str, nullptr, 0); + Window src_window_id = strtol(window_str, nullptr, 0); if(src_window_id == None || errno == EINVAL) { fprintf(stderr, "Invalid window number %s\n", window_str); usage(); } - } - int record_width = window_width; - int record_height = window_height; - if(record_area) { - if(sscanf(record_area, "%dx%d", &record_width, &record_height) != 2) { - fprintf(stderr, "Invalid value for -s '%s', expected a value in format WxH\n", record_area); + gsr_capture_xcomposite_params xcomposite_params; + xcomposite_params.window = src_window_id; + capture = gsr_capture_xcomposite_create(&xcomposite_params); + if(!capture) return 1; - } } const char *filename = args["-o"].value(); @@ -1495,77 +901,6 @@ int main(int argc, char **argv) { const double target_fps = 1.0 / (double)fps; - WindowPixmap window_pixmap; - Window window = None; - if(src_window_id) { - bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy); - if (!has_name_pixmap) { - fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by " - "your X11 server\n"); - return 1; - } - - XWindowAttributes attr; - if (!XGetWindowAttributes(dpy, src_window_id, &attr)) { - fprintf(stderr, "Error: Invalid window id: %lu\n", src_window_id); - return 1; - } - - window_width = std::max(0, attr.width); - window_height = std::max(0, attr.height); - window_x = attr.x; - window_y = attr.y; - Window c; - XTranslateCoordinates(dpy, src_window_id, DefaultRootWindow(dpy), 0, 0, &window_x, &window_y, &c); - - XCompositeRedirectWindow(dpy, src_window_id, CompositeRedirectAutomatic); - - if(!gl.load()) { - fprintf(stderr, "Error: Failed to load opengl\n"); - return 1; - } - - window = create_opengl_window(dpy); - if(!window) - return 1; - - set_vertical_sync_enabled(dpy, window, false); - recreate_window_pixmap(dpy, src_window_id, window_pixmap); - - if(!record_area) { - record_width = window_pixmap.texture_width; - record_height = window_pixmap.texture_height; - fprintf(stderr, "Record size: %dx%d\n", record_width, record_height); - } - } else { - window_pixmap.texture_id = 0; - window_pixmap.target_texture_id = 0; - window_pixmap.texture_width = window_width; - window_pixmap.texture_height = window_height; - } - - bool very_old_gpu = false; - bool gl_loaded = window; - if(!gl_loaded) { - if(!gl.load()) { - fprintf(stderr, "Error: Failed to load opengl\n"); - return 1; - } - } - - const unsigned char *gl_renderer = gl.glGetString(GL_RENDERER); - if(gl_renderer) { - int gpu_num = 1000; - sscanf((const char*)gl_renderer, "%*s %*s %*s %d", &gpu_num); - if(gpu_num < 900) { - fprintf(stderr, "Info: your gpu appears to be very old (older than maxwell architecture). Switching to lower preset\n"); - very_old_gpu = true; - } - } - - if(!gl_loaded) - gl.unload(); - if(strcmp(codec_to_use, "auto") == 0) { const AVCodec *h265_codec = find_h265_encoder(); @@ -1585,6 +920,12 @@ int main(int argc, char **argv) { } } + //bool use_hevc = strcmp(window_str, "screen") == 0 || strcmp(window_str, "screen-direct") == 0; + if(video_codec != VideoCodec::H264 && strcmp(container_format, "flv") == 0) { + video_codec = VideoCodec::H264; + fprintf(stderr, "Warning: h265 is not compatible with flv, falling back to h264 instead.\n"); + } + const AVCodec *video_codec_f = nullptr; switch(video_codec) { case VideoCodec::H264: @@ -1623,22 +964,19 @@ int main(int argc, char **argv) { requested_audio_inputs.push_back({ "", "gsr-silent" }); } - //bool use_hevc = strcmp(window_str, "screen") == 0 || strcmp(window_str, "screen-direct") == 0; - if(video_codec != VideoCodec::H264 && strcmp(container_format, "flv") == 0) { - video_codec = VideoCodec::H264; - fprintf(stderr, "Warning: h265 is not compatible with flv, falling back to h264 instead.\n"); - } - AVStream *video_stream = nullptr; std::vector audio_tracks; - AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, record_width, record_height, fps, video_codec_f, is_livestream); + AVCodecContext *video_codec_context = create_video_codec_context(av_format_context, quality, fps, video_codec_f, is_livestream); if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); - AVBufferRef *device_ctx; - CUgraphicsResource cuda_graphics_resource; - open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx, !src_window_id, quality, is_livestream, very_old_gpu); + if(gsr_capture_start(capture, video_codec_context) != 0) { + fprintf(stderr, "gsr error: gsr_capture_start failed\n"); + return 1; + } + + open_video(video_codec_context, quality, very_old_gpu); if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); @@ -1668,9 +1006,6 @@ int main(int argc, char **argv) { } } - //video_stream->duration = AV_TIME_BASE * 15; - //audio_stream->duration = AV_TIME_BASE * 15; - //av_format_context->duration = AV_TIME_BASE * 15; if(replay_buffer_size_secs == -1) { int ret = avformat_write_header(av_format_context, nullptr); if (ret < 0) { @@ -1679,51 +1014,11 @@ int main(int argc, char **argv) { } } - // av_frame_free(&rgb_frame); - // avcodec_close(av_codec_context); - - if(src_window_id) - XSelectInput(dpy, src_window_id, StructureNotifyMask | ExposureMask); - - /* - int damage_event; - int damage_error; - if (!XDamageQueryExtension(dpy, &damage_event, &damage_error)) { - fprintf(stderr, "Error: XDamage is not supported by your X11 server\n"); - return 1; - } - - Damage damage = XDamageCreate(dpy, src_window_id, XDamageReportNonEmpty); - XDamageSubtract(dpy, damage,None,None); - */ - const double start_time_pts = clock_get_monotonic_seconds(); - CUcontext old_ctx; - CUarray mapped_array; - if(src_window_id) { - res = cuda.cuCtxPopCurrent_v2(&old_ctx); - res = cuda.cuCtxPushCurrent_v2(cu_ctx); - - // Get texture - res = cuda.cuGraphicsResourceSetMapFlags( - cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); - res = cuda.cuGraphicsMapResources(1, &cuda_graphics_resource, 0); - - // Map texture to cuda array - res = cuda.cuGraphicsSubResourceGetMappedArray(&mapped_array, - cuda_graphics_resource, 0, 0); - } - - // Release texture - // res = cuGraphicsUnmapResources(1, &cuda_graphics_resource, 0); - double start_time = clock_get_monotonic_seconds(); double frame_timer_start = start_time; - double window_resize_timer = start_time; - bool window_resized = false; int fps_counter = 0; - int current_fps = 30; AVFrame *frame = av_frame_alloc(); if (!frame) { @@ -1734,27 +1029,13 @@ int main(int argc, char **argv) { frame->width = video_codec_context->width; frame->height = video_codec_context->height; - if(src_window_id) { - if (av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0) < 0) { - fprintf(stderr, "Error: av_hwframe_get_buffer failed\n"); - exit(1); - } - } else { + if(video_codec_context->hw_frames_ctx) { + // TODO: Unref at the end? frame->hw_frames_ctx = av_buffer_ref(video_codec_context->hw_frames_ctx); frame->buf[0] = av_buffer_pool_get(((AVHWFramesContext*)video_codec_context->hw_frames_ctx->data)->pool); frame->extended_data = frame->data; } - if(window_pixmap.texture_width < record_width) - frame->width = window_pixmap.texture_width & ~1; - else - frame->width = record_width & ~1; - - if(window_pixmap.texture_height < record_height) - frame->height = window_pixmap.texture_height & ~1; - else - frame->height = record_height & ~1; - std::mutex write_output_mutex; const double record_start_time = clock_get_monotonic_seconds(); @@ -1882,108 +1163,20 @@ int main(int argc, char **argv) { }, av_format_context, &write_output_mutex); } - started = 1; - // Set update_fps to 24 to test if duplicate/delayed frames cause video/audio desync or too fast/slow video. const double update_fps = fps + 190; int64_t video_pts_counter = 0; + bool should_stop_error = false; - bool redraw = true; - XEvent e; while (running) { double frame_start = clock_get_monotonic_seconds(); - if(window) - gl.glClear(GL_COLOR_BUFFER_BIT); - - redraw = true; - - if(src_window_id) { - if (XCheckTypedWindowEvent(dpy, src_window_id, DestroyNotify, &e)) { - running = 0; - } - - if (XCheckTypedWindowEvent(dpy, src_window_id, Expose, &e) && e.xexpose.count == 0) { - window_resize_timer = clock_get_monotonic_seconds(); - window_resized = true; - } - - if (XCheckTypedWindowEvent(dpy, src_window_id, ConfigureNotify, &e) && e.xconfigure.window == src_window_id) { - while(XCheckTypedWindowEvent(dpy, src_window_id, ConfigureNotify, &e)) {} - window_x = e.xconfigure.x; - window_y = e.xconfigure.y; - Window c; - XTranslateCoordinates(dpy, src_window_id, DefaultRootWindow(dpy), 0, 0, &window_x, &window_y, &c); - // Window resize - if(e.xconfigure.width != (int)window_width || e.xconfigure.height != (int)window_height) { - window_width = std::max(0, e.xconfigure.width); - window_height = std::max(0, e.xconfigure.height); - window_resize_timer = clock_get_monotonic_seconds(); - window_resized = true; - } - } - - const double window_resize_timeout = 1.0; // 1 second - if(window_resized && clock_get_monotonic_seconds() - window_resize_timer >= window_resize_timeout) { - window_resized = false; - fprintf(stderr, "Resize window!\n"); - recreate_window_pixmap(dpy, src_window_id, window_pixmap); - // Resolution must be a multiple of two - //video_stream->codec->width = window_pixmap.texture_width & ~1; - //video_stream->codec->height = window_pixmap.texture_height & ~1; - - cuda.cuGraphicsUnregisterResource(cuda_graphics_resource); - res = cuda.cuGraphicsGLRegisterImage( - &cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); - if (res != CUDA_SUCCESS) { - const char *err_str; - cuda.cuGetErrorString(res, &err_str); - fprintf(stderr, - "Error: cuda.cuGraphicsGLRegisterImage failed, error %s, texture " - "id: %u\n", - err_str, window_pixmap.target_texture_id); - running = false; - break; - } - - res = cuda.cuGraphicsResourceSetMapFlags( - cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); - res = cuda.cuGraphicsMapResources(1, &cuda_graphics_resource, 0); - res = cuda.cuGraphicsSubResourceGetMappedArray(&mapped_array, cuda_graphics_resource, 0, 0); - - av_frame_free(&frame); - frame = av_frame_alloc(); - if (!frame) { - fprintf(stderr, "Error: Failed to allocate frame\n"); - running = false; - break; - } - frame->format = video_codec_context->pix_fmt; - frame->width = video_codec_context->width; - frame->height = video_codec_context->height; - - if (av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0) < 0) { - fprintf(stderr, "Error: av_hwframe_get_buffer failed\n"); - running = false; - break; - } - - if(window_pixmap.texture_width < record_width) - frame->width = window_pixmap.texture_width & ~1; - else - frame->width = record_width & ~1; - if(window_pixmap.texture_height < record_height) - frame->height = window_pixmap.texture_height & ~1; - else - frame->height = record_height & ~1; - - // Make the new completely black to clear unused parts - // TODO: cuMemsetD32? - cuda.cuMemsetD8_v2((CUdeviceptr)frame->data[0], 0, record_width * record_height * 4); - } + gsr_capture_tick(capture, video_codec_context, &frame); + should_stop_error = false; + if(gsr_capture_should_stop(capture, &should_stop_error)) { + running = 0; + break; } - ++fps_counter; double time_now = clock_get_monotonic_seconds(); @@ -1992,110 +1185,13 @@ int main(int argc, char **argv) { if (elapsed >= 1.0) { fprintf(stderr, "update fps: %d\n", fps_counter); start_time = time_now; - current_fps = fps_counter; fps_counter = 0; } double frame_time_overflow = frame_timer_elapsed - target_fps; if (frame_time_overflow >= 0.0) { frame_timer_start = time_now - frame_time_overflow; - - bool frame_captured = true; - if(redraw) { - redraw = false; - if(src_window_id) { - // TODO: Use a framebuffer instead. glCopyImageSubData requires - // opengl 4.2 - int source_x = 0; - int source_y = 0; - - int source_width = window_pixmap.texture_width; - int source_height = window_pixmap.texture_height; - - bool clamped = false; - - if(window_pixmap.composite_window) { - source_x = window_x; - source_y = window_y; - - int underflow_x = 0; - int underflow_y = 0; - - if(source_x < 0) { - underflow_x = -source_x; - source_x = 0; - source_width += source_x; - } - - if(source_y < 0) { - underflow_y = -source_y; - source_y = 0; - source_height += source_y; - } - - const int clamped_source_width = std::max(0, window_pixmap.texture_real_width - source_x - underflow_x); - const int clamped_source_height = std::max(0, window_pixmap.texture_real_height - source_y - underflow_y); - - if(clamped_source_width < source_width) { - source_width = clamped_source_width; - clamped = true; - } - - if(clamped_source_height < source_height) { - source_height = clamped_source_height; - clamped = true; - } - } - - if(clamped) { - // Requires opengl 4.4... TODO: Replace with earlier opengl if opengl < 4.2 - if(gl.glClearTexImage) - gl.glClearTexImage(window_pixmap.target_texture_id, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr); - } - - // Requires opengl 4.2... TODO: Replace with earlier opengl if opengl < 4.2 - gl.glCopyImageSubData( - window_pixmap.texture_id, GL_TEXTURE_2D, 0, source_x, source_y, 0, - window_pixmap.target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, - source_width, source_height, 1); - unsigned int err = gl.glGetError(); - if(err != 0) { - static bool error_shown = false; - if(!error_shown) { - error_shown = true; - fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); - } - } - gl.glXSwapBuffers(dpy, window); - // int err = gl.glGetError(); - // fprintf(stderr, "error: %d\n", err); - - // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id - - frame->linesize[0] = frame->width * 4; - - CUDA_MEMCPY2D memcpy_struct; - memcpy_struct.srcXInBytes = 0; - memcpy_struct.srcY = 0; - memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; - - memcpy_struct.dstXInBytes = 0; - memcpy_struct.dstY = 0; - memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; - - memcpy_struct.srcArray = mapped_array; - memcpy_struct.dstDevice = (CUdeviceptr)frame->data[0]; - memcpy_struct.dstPitch = frame->linesize[0]; - memcpy_struct.WidthInBytes = frame->width * 4; - memcpy_struct.Height = frame->height; - cuda.cuMemcpy2D_v2(&memcpy_struct); - - frame_captured = true; - } else { - gsr_capture_capture(capture, frame); - } - // res = cuda.cuCtxPopCurrent_v2(&old_ctx); - } + gsr_capture_capture(capture, frame); const double this_video_frame_time = clock_get_monotonic_seconds(); const int64_t expected_frames = std::round((this_video_frame_time - start_time_pts) / target_fps); @@ -2152,12 +1248,11 @@ int main(int argc, char **argv) { if(replay_buffer_size_secs == -1 && !(output_format->flags & AVFMT_NOFILE)) avio_close(av_format_context->pb); - if(capture) - gsr_capture_destroy(capture); + gsr_capture_destroy(capture, video_codec_context); if(dpy) XCloseDisplay(dpy); - unlink(pid_file); free(empty_audio); + return should_stop_error ? 3 : 0; } diff --git a/src/sound.cpp b/src/sound.cpp index 794d3ea..9083f1e 100644 --- a/src/sound.cpp +++ b/src/sound.cpp @@ -1,21 +1,7 @@ -/* - Copyright (C) 2020 dec05eba - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - #include "../include/sound.hpp" +extern "C" { +#include "../include/time.h" +} #include #include @@ -43,14 +29,6 @@ } \ } while(false); -static double clock_get_monotonic_seconds() { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 0; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; -} - struct pa_handle { pa_context *context; pa_stream *stream; diff --git a/src/time.c b/src/time.c new file mode 100644 index 0000000..b2dcdea --- /dev/null +++ b/src/time.c @@ -0,0 +1,10 @@ +#include "../include/time.h" +#include + +double clock_get_monotonic_seconds() { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; +} diff --git a/src/window_texture.c b/src/window_texture.c new file mode 100644 index 0000000..1c152ae --- /dev/null +++ b/src/window_texture.c @@ -0,0 +1,176 @@ +#include "../include/window_texture.h" +#include +#include + +static int x11_supports_composite_named_window_pixmap(Display *display) { + int extension_major; + int extension_minor; + if(!XCompositeQueryExtension(display, &extension_major, &extension_minor)) + return 0; + + int major_version; + int minor_version; + return XCompositeQueryVersion(display, &major_version, &minor_version) && (major_version > 0 || minor_version >= 2); +} + +int window_texture_init(WindowTexture *window_texture, Display *display, Window window, gsr_gl *gl) { + window_texture->display = display; + window_texture->window = window; + window_texture->pixmap = None; + window_texture->glx_pixmap = None; + window_texture->texture_id = 0; + window_texture->redirected = 0; + window_texture->gl = gl; + + if(!x11_supports_composite_named_window_pixmap(display)) + return 1; + + XCompositeRedirectWindow(display, window, CompositeRedirectAutomatic); + window_texture->redirected = 1; + return window_texture_on_resize(window_texture); +} + +static void window_texture_cleanup(WindowTexture *self, int delete_texture) { + if(delete_texture && self->texture_id) { + self->gl->glDeleteTextures(1, &self->texture_id); + self->texture_id = 0; + } + + if(self->glx_pixmap) { + self->gl->glXDestroyPixmap(self->display, self->glx_pixmap); + self->gl->glXReleaseTexImageEXT(self->display, self->glx_pixmap, GLX_FRONT_EXT); + self->glx_pixmap = None; + } + + if(self->pixmap) { + XFreePixmap(self->display, self->pixmap); + self->pixmap = None; + } +} + +void window_texture_deinit(WindowTexture *self) { + if(self->redirected) { + XCompositeUnredirectWindow(self->display, self->window, CompositeRedirectAutomatic); + self->redirected = 0; + } + window_texture_cleanup(self, 1); +} + +int window_texture_on_resize(WindowTexture *self) { + window_texture_cleanup(self, 0); + + int result = 0; + GLXFBConfig *configs = NULL; + Pixmap pixmap = None; + GLXPixmap glx_pixmap = None; + unsigned int texture_id = 0; + int glx_pixmap_bound = 0; + + const int pixmap_config[] = { + GLX_BIND_TO_TEXTURE_RGB_EXT, True, + GLX_DRAWABLE_TYPE, GLX_PIXMAP_BIT | GLX_WINDOW_BIT, + GLX_BIND_TO_TEXTURE_TARGETS_EXT, GLX_TEXTURE_2D_BIT_EXT, + /*GLX_BIND_TO_MIPMAP_TEXTURE_EXT, True,*/ + GLX_BUFFER_SIZE, 24, + GLX_RED_SIZE, 8, + GLX_GREEN_SIZE, 8, + GLX_BLUE_SIZE, 8, + GLX_ALPHA_SIZE, 0, + None + }; + + const int pixmap_attribs[] = { + GLX_TEXTURE_TARGET_EXT, GLX_TEXTURE_2D_EXT, + GLX_TEXTURE_FORMAT_EXT, GLX_TEXTURE_FORMAT_RGB_EXT, + /*GLX_MIPMAP_TEXTURE_EXT, True,*/ + None + }; + + XWindowAttributes attr; + if (!XGetWindowAttributes(self->display, self->window, &attr)) { + fprintf(stderr, "Failed to get window attributes\n"); + return 1; + } + + GLXFBConfig config; + int c; + configs = self->gl->glXChooseFBConfig(self->display, 0, pixmap_config, &c); + if(!configs) { + fprintf(stderr, "Failed to choose fb config\n"); + return 1; + } + + int found = 0; + for (int i = 0; i < c; i++) { + config = configs[i]; + XVisualInfo *visual = self->gl->glXGetVisualFromFBConfig(self->display, config); + if (!visual) + continue; + + if (attr.depth != visual->depth) { + XFree(visual); + continue; + } + XFree(visual); + found = 1; + break; + } + + if(!found) { + fprintf(stderr, "No matching fb config found\n"); + result = 1; + goto cleanup; + } + + pixmap = XCompositeNameWindowPixmap(self->display, self->window); + if(!pixmap) { + result = 2; + goto cleanup; + } + + glx_pixmap = self->gl->glXCreatePixmap(self->display, config, pixmap, pixmap_attribs); + if(!glx_pixmap) { + result = 3; + goto cleanup; + } + + if(self->texture_id == 0) { + self->gl->glGenTextures(1, &texture_id); + if(texture_id == 0) { + result = 4; + goto cleanup; + } + self->gl->glBindTexture(GL_TEXTURE_2D, texture_id); + } else { + self->gl->glBindTexture(GL_TEXTURE_2D, self->texture_id); + } + + self->gl->glXBindTexImageEXT(self->display, glx_pixmap, GLX_FRONT_EXT, NULL); + glx_pixmap_bound = 1; + + self->gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + self->gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + self->gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + self->gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + self->gl->glBindTexture(GL_TEXTURE_2D, 0); + + XFree(configs); + self->pixmap = pixmap; + self->glx_pixmap = glx_pixmap; + if(texture_id != 0) + self->texture_id = texture_id; + return 0; + + cleanup: + if(texture_id != 0) self->gl->glDeleteTextures(1, &texture_id); + if(glx_pixmap) self->gl->glXDestroyPixmap(self->display, glx_pixmap); + if(glx_pixmap_bound) self->gl->glXReleaseTexImageEXT(self->display, glx_pixmap, GLX_FRONT_EXT); + if(pixmap) XFreePixmap(self->display, pixmap); + if(configs) XFree(configs); + return result; +} + +unsigned int window_texture_get_opengl_texture_id(WindowTexture *self) { + return self->texture_id; +} -- cgit v1.2.3