From 8cbdb596ebf79587a432ed40583630b6cd39ed88 Mon Sep 17 00:00:00 2001 From: Steam Deck User Date: Thu, 16 Mar 2023 13:36:19 +0100 Subject: vaapi wip --- src/capture/xcomposite_drm.c | 708 ++++++++++++++++++++++++------------------- src/cuda.c | 8 +- src/egl.c | 59 ++-- src/main.cpp | 208 ++++++++----- src/vaapi.c | 41 +++ src/window_texture.c | 8 +- 6 files changed, 602 insertions(+), 430 deletions(-) create mode 100644 src/vaapi.c (limited to 'src') diff --git a/src/capture/xcomposite_drm.c b/src/capture/xcomposite_drm.c index 6e3f7bb..9fb323d 100644 --- a/src/capture/xcomposite_drm.c +++ b/src/capture/xcomposite_drm.c @@ -1,5 +1,6 @@ #include "../../include/capture/xcomposite_drm.h" #include "../../include/egl.h" +#include "../../include/vaapi.h" #include "../../include/window_texture.h" #include "../../include/time.h" #include @@ -7,7 +8,7 @@ #include #include #include -#include +#include #include #include //#include @@ -28,6 +29,7 @@ typedef struct { WindowTexture window_texture; gsr_egl egl; + gsr_vaapi vaapi; int fourcc; int num_planes; @@ -36,12 +38,16 @@ typedef struct { int32_t stride; int32_t offset; - unsigned int target_texture_id; + unsigned int target_textures[2]; - unsigned int FramebufferName; - unsigned int quad_VertexArrayID; - unsigned int quad_vertexbuffer; + unsigned int FramebufferNameY; + unsigned int FramebufferNameUV; // TODO: Remove unsigned int quadVAO; + + unsigned int shader_y; + unsigned int shader_uv; + + VADisplay va_dpy; } gsr_capture_xcomposite_drm; static int max_int(int a, int b) { @@ -71,11 +77,16 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo (AVHWFramesContext *)frame_context->data; hw_frame_context->width = video_codec_context->width; hw_frame_context->height = video_codec_context->height; - hw_frame_context->sw_format = AV_PIX_FMT_YUV420P;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_YUV420P;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_NV12; + hw_frame_context->sw_format = AV_PIX_FMT_NV12;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_YUV420P;//AV_PIX_FMT_0RGB32;//AV_PIX_FMT_NV12; hw_frame_context->format = video_codec_context->pix_fmt; hw_frame_context->device_ref = device_ctx; hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + hw_frame_context->initial_pool_size = 1; + + AVVAAPIDeviceContext *vactx =((AVHWDeviceContext*)device_ctx->data)->hwctx; + cap_xcomp->va_dpy = vactx->display; + if (av_hwframe_ctx_init(frame_context) < 0) { fprintf(stderr, "Error: Failed to initialize hardware frame context " "(note: ffmpeg version needs to be > 4.0)\n"); @@ -89,45 +100,6 @@ static bool drm_create_codec_context(gsr_capture_xcomposite_drm *cap_xcomp, AVCo return true; } -#define EGL_SURFACE_TYPE 0x3033 -#define EGL_WINDOW_BIT 0x0004 -#define EGL_PIXMAP_BIT 0x0002 -#define EGL_BIND_TO_TEXTURE_RGB 0x3039 -#define EGL_TRUE 1 -#define EGL_RED_SIZE 0x3024 -#define EGL_GREEN_SIZE 0x3023 -#define EGL_BLUE_SIZE 0x3022 -#define EGL_ALPHA_SIZE 0x3021 -#define EGL_TEXTURE_FORMAT 0x3080 -#define EGL_TEXTURE_RGB 0x305D -#define EGL_TEXTURE_TARGET 0x3081 -#define EGL_TEXTURE_2D 0x305F -#define EGL_GL_TEXTURE_2D 0x30B1 - -#define GL_RGBA 0x1908 - -static unsigned int gl_create_texture(gsr_capture_xcomposite_drm *cap_xcomp, int width, int height) { - // Generating this second texture is needed because - // cuGraphicsGLRegisterImage cant be used with the texture that is mapped - // directly to the pixmap. - // TODO: Investigate if it's somehow possible to use the pixmap texture - // directly, this should improve performance since only less image copy is - // then needed every frame. - // Ignoring failure for now.. TODO: Show proper error - unsigned int texture_id = 0; - cap_xcomp->egl.glGenTextures(1, &texture_id); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, texture_id); - cap_xcomp->egl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); - return texture_id; -} - #define GL_COMPILE_STATUS 0x8B81 #define GL_INFO_LOG_LENGTH 0x8B84 @@ -249,10 +221,7 @@ unsigned int esLoadProgram ( gsr_capture_xcomposite_drm *cap_xcomp, const char * return programObject; } -static unsigned int shader_program = 0; -static unsigned int texID = 0; - -static void LoadShaders(gsr_capture_xcomposite_drm *cap_xcomp) { +static unsigned int LoadShadersY(gsr_capture_xcomposite_drm *cap_xcomp) { char vShaderStr[] = "#version 300 es \n" "in vec2 pos; \n" @@ -318,17 +287,168 @@ static void LoadShaders(gsr_capture_xcomposite_drm *cap_xcomp) { " FragColor = vec4(rgb, 1.0); \n" "} \n"; #elif 1 + char fShaderStr[] = + "#version 300 es \n" + "precision mediump float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + //"uniform sampler2D tex2; \n" + "out vec4 FragColor; \n" + //"out vec4 FragColor2; \n" + "mat4 RGBtoYUV() {\n" + " return mat4(\n" + " vec4(0.257, 0.439, -0.148, 0.0),\n" + " vec4(0.504, -0.368, -0.291, 0.0),\n" + " vec4(0.098, -0.071, 0.439, 0.0),\n" + " vec4(0.0625, 0.500, 0.500, 1.0)\n" + " );\n" + "}\n" + "void main() \n" + "{ \n" + //" vec3 yuv = rgb2yuv(texture(tex1, texcoords_out).rgb); \n" + //" FragColor.x = yuv.x; \n" + //" FragColor2.xy = yuv.xy; \n" + //" vec3 rgb = texture(tex1, texcoords_out).rgb;\n" + "FragColor.x = (RGBtoYUV() * vec4(texture(tex1, texcoords_out).rgb, 1.0)).x;\n" + //"FragColor2.xy = (RGBtoYUV() * vec4(texture(tex1, texcoords_out*2.0).rgb, 1.0)).zy;\n" + "} \n"; +#else char fShaderStr[] = "#version 300 es \n" "precision mediump float; \n" "in vec2 texcoords_out; \n" "uniform sampler2D tex; \n" "out vec4 FragColor; \n" + + "vec3 rgb2yuv(vec3 rgb){\n" + " float y = 0.299*rgb.r + 0.587*rgb.g + 0.114*rgb.b;\n" + " return vec3(y, 0.493*(rgb.b-y), 0.877*(rgb.r-y));\n" + "}\n" + + "vec3 yuv2rgb(vec3 yuv){\n" + " float y = yuv.x;\n" + " float u = yuv.y;\n" + " float v = yuv.z;\n" + " \n" + " return vec3(\n" + " y + 1.0/0.877*v,\n" + " y - 0.39393*u - 0.58081*v,\n" + " y + 1.0/0.493*u\n" + " );\n" + "}\n" + "void main() \n" "{ \n" - " vec3 rgb = texture(tex, texcoords_out).rgb; \n" + " float s = 0.5;\n" + " vec3 lum = texture(tex, texcoords_out).rgb;\n" + " vec3 chr = texture(tex, floor(texcoords_out*s-.5)/s).rgb;\n" + " vec3 rgb = vec3(rgb2yuv(lum).x, rgb2yuv(chr).yz);\n" " FragColor = vec4(rgb, 1.0); \n" "} \n"; +#endif + + unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr); + if (shader_program == 0) { + fprintf(stderr, "failed to create shader!\n"); + return 0; + } + + cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos"); + cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords"); + return shader_program; +} + +static unsigned int LoadShadersUV(gsr_capture_xcomposite_drm *cap_xcomp) { + char vShaderStr[] = + "#version 300 es \n" + "in vec2 pos; \n" + "in vec2 texcoords; \n" + "out vec2 texcoords_out; \n" + "void main() \n" + "{ \n" + " texcoords_out = texcoords; \n" + " gl_Position = vec4(pos.x, pos.y, 0.0, 1.0); \n" + "} \n"; + +#if 0 + char fShaderStr[] = + "#version 300 es \n" + "precision mediump float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex; \n" + "out vec4 FragColor; \n" + + + "float imageWidth = 1920.0;\n" + "float imageHeight = 1080.0;\n" + + "float getYPixel(vec2 position) {\n" + " position.y = (position.y * 2.0 / 3.0) + (1.0 / 3.0);\n" + " return texture2D(tex, position).x;\n" + "}\n" +"\n" + "vec2 mapCommon(vec2 position, float planarOffset) {\n" + " planarOffset += (imageWidth * floor(position.y / 2.0)) / 2.0 +\n" + " floor((imageWidth - 1.0 - position.x) / 2.0);\n" + " float x = floor(imageWidth - 1.0 - floor(mod(planarOffset, imageWidth)));\n" + " float y = floor(floor(planarOffset / imageWidth));\n" + " return vec2((x + 0.5) / imageWidth, (y + 0.5) / (1.5 * imageHeight));\n" + "}\n" +"\n" + "vec2 mapU(vec2 position) {\n" + " float planarOffset = (imageWidth * imageHeight) / 4.0;\n" + " return mapCommon(position, planarOffset);\n" + "}\n" +"\n" + "vec2 mapV(vec2 position) {\n" + " return mapCommon(position, 0.0);\n" + "}\n" + + "void main() \n" + "{ \n" + + "vec2 pixelPosition = vec2(floor(imageWidth * texcoords_out.x),\n" + " floor(imageHeight * texcoords_out.y));\n" + "pixelPosition -= vec2(0.5, 0.5);\n" +"\n" + "float yChannel = getYPixel(texcoords_out);\n" + "float uChannel = texture2D(tex, mapU(pixelPosition)).x;\n" + "float vChannel = texture2D(tex, mapV(pixelPosition)).x;\n" + "vec4 channels = vec4(yChannel, uChannel, vChannel, 1.0);\n" + "mat4 conversion = mat4(1.0, 0.0, 1.402, -0.701,\n" + " 1.0, -0.344, -0.714, 0.529,\n" + " 1.0, 1.772, 0.0, -0.886,\n" + " 0, 0, 0, 0);\n" + "vec3 rgb = (channels * conversion).xyz;\n" + + " FragColor = vec4(rgb, 1.0); \n" + "} \n"; +#elif 1 + char fShaderStr[] = + "#version 300 es \n" + "precision mediump float; \n" + "in vec2 texcoords_out; \n" + "uniform sampler2D tex1; \n" + //"uniform sampler2D tex2; \n" + "out vec4 FragColor; \n" + //"out vec4 FragColor2; \n" + "mat4 RGBtoYUV() {\n" + " return mat4(\n" + " vec4(0.257, 0.439, -0.148, 0.0),\n" + " vec4(0.504, -0.368, -0.291, 0.0),\n" + " vec4(0.098, -0.071, 0.439, 0.0),\n" + " vec4(0.0625, 0.500, 0.500, 1.0)\n" + " );\n" + "}\n" + "void main() \n" + "{ \n" + //" vec3 yuv = rgb2yuv(texture(tex1, texcoords_out).rgb); \n" + //" FragColor.x = yuv.x; \n" + //" FragColor2.xy = yuv.xy; \n" + //" vec3 rgb = texture(tex1, texcoords_out).rgb;\n" + //"FragColor.x = (RGBtoYUV() * vec4(texture(tex1, texcoords_out).rgb, 1.0)).x;\n" + "FragColor.xy = (RGBtoYUV() * vec4(texture(tex1, texcoords_out*2.0).rgb, 1.0)).zy;\n" + "} \n"; #else char fShaderStr[] = "#version 300 es \n" @@ -364,15 +484,15 @@ static void LoadShaders(gsr_capture_xcomposite_drm *cap_xcomp) { "} \n"; #endif - shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr); + unsigned int shader_program = esLoadProgram(cap_xcomp, vShaderStr, fShaderStr); if (shader_program == 0) { fprintf(stderr, "failed to create shader!\n"); - return; + return 0; } cap_xcomp->egl.glBindAttribLocation(shader_program, 0, "pos"); cap_xcomp->egl.glBindAttribLocation(shader_program, 1, "texcoords"); - return; + return shader_program; } #define GL_FLOAT 0x1406 @@ -381,12 +501,20 @@ static void LoadShaders(gsr_capture_xcomposite_drm *cap_xcomp) { #define GL_TRIANGLES 0x0004 #define DRM_FORMAT_MOD_INVALID 72057594037927935 +#define EGL_TRUE 1 +#define EGL_IMAGE_PRESERVED_KHR 0x30D2 +#define EGL_NATIVE_PIXMAP_KHR 0x30B0 + +static uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + return (d << 24) | (c << 16) | (b << 8) | a; +} + static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *video_codec_context) { gsr_capture_xcomposite_drm *cap_xcomp = cap->priv; XWindowAttributes attr; if(!XGetWindowAttributes(cap_xcomp->dpy, cap_xcomp->params.window, &attr)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_start failed: invalid window id: %lu\n", cap_xcomp->params.window); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start failed: invalid window id: %lu\n", cap_xcomp->params.window); return -1; } @@ -399,18 +527,24 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi XSelectInput(cap_xcomp->dpy, cap_xcomp->params.window, StructureNotifyMask | ExposureMask); if(!gsr_egl_load(&cap_xcomp->egl, cap_xcomp->dpy)) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: failed to load opengl\n"); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed to load opengl\n"); return -1; } if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: could not find eglExportDMABUFImageQueryMESA\n"); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageQueryMESA\n"); gsr_egl_unload(&cap_xcomp->egl); return -1; } if(!cap_xcomp->egl.eglExportDMABUFImageMESA) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_start: could not find eglExportDMABUFImageMESA\n"); + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: could not find eglExportDMABUFImageMESA\n"); + gsr_egl_unload(&cap_xcomp->egl); + return -1; + } + + if(!gsr_vaapi_load(&cap_xcomp->vaapi)) { + fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed to load vaapi\n"); gsr_egl_unload(&cap_xcomp->egl); return -1; } @@ -462,17 +596,6 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &cap_xcomp->texture_size.y); cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); - #if 1 - cap_xcomp->target_texture_id = gl_create_texture(cap_xcomp, cap_xcomp->texture_size.x, cap_xcomp->texture_size.y); - if(cap_xcomp->target_texture_id == 0) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_start: failed to create opengl texture\n"); - return -1; - } - #else - // TODO: - cap_xcomp->target_texture_id = window_texture_get_opengl_texture_id(&cap_xcomp->window_texture); - #endif - cap_xcomp->texture_size.x = max_int(2, cap_xcomp->texture_size.x & ~1); cap_xcomp->texture_size.y = max_int(2, cap_xcomp->texture_size.y & ~1); @@ -480,13 +603,18 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi video_codec_context->height = cap_xcomp->texture_size.y; { - EGLImage img = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_context, EGL_GL_TEXTURE_2D, (EGLClientBuffer)(uint64_t)cap_xcomp->target_texture_id, NULL); + const intptr_t pixmap_attrs[] = { + EGL_IMAGE_PRESERVED_KHR, EGL_TRUE, + EGL_NONE, + }; + + EGLImage img = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_context, EGL_GL_TEXTURE_2D, (EGLClientBuffer)(uint64_t)window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), pixmap_attrs); if(!img) { fprintf(stderr, "eglCreateImage failed\n"); return -1; } - if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->fourcc, &cap_xcomp->num_planes, &cap_xcomp->modifiers) || cap_xcomp->modifiers == DRM_FORMAT_MOD_INVALID) { + if(!cap_xcomp->egl.eglExportDMABUFImageQueryMESA(cap_xcomp->egl.egl_display, img, &cap_xcomp->fourcc, &cap_xcomp->num_planes, &cap_xcomp->modifiers)) { fprintf(stderr, "eglExportDMABUFImageQueryMESA failed\n"); return -1; } @@ -502,102 +630,21 @@ static int gsr_capture_xcomposite_drm_start(gsr_capture *cap, AVCodecContext *vi return -1; } - fprintf(stderr, "texture: %u, dmabuf: %d, stride: %d, offset: %d\n", cap_xcomp->target_texture_id, cap_xcomp->dmabuf_fd, cap_xcomp->stride, cap_xcomp->offset); + fprintf(stderr, "texture: %u, dmabuf: %d, stride: %d, offset: %d\n", window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), cap_xcomp->dmabuf_fd, cap_xcomp->stride, cap_xcomp->offset); fprintf(stderr, "fourcc: %d, num planes: %d, modifiers: %zu\n", cap_xcomp->fourcc, cap_xcomp->num_planes, cap_xcomp->modifiers); } - cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->FramebufferName); - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferName); - - cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_texture_id, 0); - - // Set the list of draw buffers. - unsigned int DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; - cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers - - if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - fprintf(stderr, "Failed to setup framebuffer\n"); - return -1; - } - - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); - - //cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->quad_VertexArrayID); - //cap_xcomp->egl.glBindVertexArray(cap_xcomp->quad_VertexArrayID); - - static const float g_quad_vertex_buffer_data[] = { - -1.0f, -1.0f, 0.0f, - 1.0f, -1.0f, 0.0f, - -1.0f, 1.0f, 0.0f, - -1.0f, 1.0f, 0.0f, - 1.0f, -1.0f, 0.0f, - 1.0f, 1.0f, 0.0f, - }; - - //cap_xcomp->egl.glGenBuffers(1, &cap_xcomp->quad_vertexbuffer); - //cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, cap_xcomp->quad_vertexbuffer); - //cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(g_quad_vertex_buffer_data), g_quad_vertex_buffer_data, GL_STATIC_DRAW); - - // Create and compile our GLSL program from the shaders - LoadShaders(cap_xcomp); - texID = cap_xcomp->egl.glGetUniformLocation(shader_program, "tex"); - fprintf(stderr, "uniform id: %u\n", texID); - - float vVertices[] = { - -1.0f, 1.0f, 0.0f, 1.0f, - -1.0f, -1.0f, 0.0f, 0.0f, - 1.0f, -1.0f, 1.0f, 0.0f, - - -1.0f, 1.0f, 0.0f, 1.0f, - 1.0f, -1.0f, 1.0f, 0.0f, - 1.0f, 1.0f, 1.0f, 1.0f - }; - - unsigned int quadVBO; - cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->quadVAO); - cap_xcomp->egl.glGenBuffers(1, &quadVBO); - cap_xcomp->egl.glBindVertexArray(cap_xcomp->quadVAO); - cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, quadVBO); - cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(vVertices), &vVertices, GL_STATIC_DRAW); - - cap_xcomp->egl.glEnableVertexAttribArray(0); - cap_xcomp->egl.glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); - - cap_xcomp->egl.glEnableVertexAttribArray(1); - cap_xcomp->egl.glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); - - cap_xcomp->egl.glBindVertexArray(0); - - //cap_xcomp->egl.glUniform1i(texID, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); - - //cap_xcomp->egl.glViewport(0, 0, 1920, 1080); - - //cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, 0); - //cap_xcomp->egl.glBindVertexArray(0); - if(!drm_create_codec_context(cap_xcomp, video_codec_context)) { fprintf(stderr, "failed to create hw codec context\n"); gsr_egl_unload(&cap_xcomp->egl); return -1; } - fprintf(stderr, "sneed: %u\n", cap_xcomp->FramebufferName); + //fprintf(stderr, "sneed: %u\n", cap_xcomp->FramebufferName); return 0; #endif } -// TODO: -static void free_desc(void *opaque, uint8_t *data) { - AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)data; - int i; - - //for (i = 0; i < desc->nb_objects; i++) - // close(desc->objects[i].fd); - - av_free(desc); -} - - static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *video_codec_context, AVFrame **frame) { gsr_capture_xcomposite_drm *cap_xcomp = cap->priv; @@ -606,77 +653,6 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi if(!cap_xcomp->created_hw_frame) { cap_xcomp->created_hw_frame = true; - /*if(av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, *frame, 0) < 0) { - fprintf(stderr, "gsr error: gsr_capture_xcomposite_drm_tick: av_hwframe_get_buffer failed\n"); - return; - }*/ - - AVDRMFrameDescriptor *desc = av_malloc(sizeof(AVDRMFrameDescriptor)); - if(!desc) { - fprintf(stderr, "poop\n"); - return; - } - - fprintf(stderr, "tick fd: %d\n", cap_xcomp->dmabuf_fd); - - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_texture_id); - int xx = 0; - int yy = 0; - cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xx); - cap_xcomp->egl.glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &yy); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); - - *desc = (AVDRMFrameDescriptor) { - .nb_objects = 1, - .objects[0] = { - .fd = cap_xcomp->dmabuf_fd, - .size = yy * cap_xcomp->stride, - .format_modifier = cap_xcomp->modifiers, - }, - .nb_layers = 1, - .layers[0] = { - .format = cap_xcomp->fourcc, // DRM_FORMAT_NV12 - .nb_planes = 1, //cap_xcomp->num_planes, // TODO: Ensure this is 1, otherwise ffmpeg cant handle it in av_hwframe_map - .planes[0] = { - .object_index = 0, - .offset = cap_xcomp->offset, - .pitch = cap_xcomp->stride, - }, - }, - }; - - #if 0 - AVBufferRef *device_ctx; - if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_DRM, "/dev/dri/card0", NULL, 0) < 0) { - fprintf(stderr, "Error: Failed to create hardware device context\n"); - return; - } - - AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); - if(!frame_context) { - fprintf(stderr, "Error: Failed to create hwframe context\n"); - av_buffer_unref(&device_ctx); - return; - } - - AVHWFramesContext *hw_frame_context = - (AVHWFramesContext *)frame_context->data; - hw_frame_context->width = video_codec_context->width; - hw_frame_context->height = video_codec_context->height; - hw_frame_context->sw_format = AV_PIX_FMT_0RGB32; - hw_frame_context->format = AV_PIX_FMT_DRM_PRIME; - hw_frame_context->device_ref = device_ctx; - hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; - - if (av_hwframe_ctx_init(frame_context) < 0) { - fprintf(stderr, "Error: Failed to initialize hardware frame context " - "(note: ffmpeg version needs to be > 4.0)\n"); - av_buffer_unref(&device_ctx); - av_buffer_unref(&frame_context); - return; - } - #endif - av_frame_free(frame); *frame = av_frame_alloc(); if(!frame) { @@ -694,32 +670,184 @@ static void gsr_capture_xcomposite_drm_tick(gsr_capture *cap, AVCodecContext *vi return; } - AVFrame *src_frame = av_frame_alloc(); - assert(src_frame); - src_frame->format = AV_PIX_FMT_DRM_PRIME; - src_frame->width = video_codec_context->width; - src_frame->height = video_codec_context->height; - src_frame->color_range = AVCOL_RANGE_JPEG; - - src_frame->buf[0] = av_buffer_create((uint8_t*)desc, sizeof(*desc), - &free_desc, video_codec_context, 0); - if (!src_frame->buf[0]) { - fprintf(stderr, "failed to create buffer!\n"); + fprintf(stderr, "fourcc: %u\n", cap_xcomp->fourcc); + fprintf(stderr, "va surface id: %u\n", (VASurfaceID)(uintptr_t)(*frame)->data[3]); + + VADRMPRIMESurfaceDescriptor prime; + + VASurfaceID surface_id = (uintptr_t)(*frame)->data[3]; + VAStatus va_status = cap_xcomp->vaapi.vaExportSurfaceHandle(cap_xcomp->va_dpy, surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &prime); // TODO: Composed layers + if(va_status != VA_STATUS_SUCCESS) { + fprintf(stderr, "vaExportSurfaceHandle failed\n"); return; } + cap_xcomp->vaapi.vaSyncSurface(cap_xcomp->va_dpy, surface_id); + + fprintf(stderr, "fourcc: %u, width: %u, height: %u\n", prime.fourcc, prime.width, prime.height); + for(int i = 0; i < prime.num_layers; ++i) { + fprintf(stderr, " drm format: %u, num planes: %u\n", prime.layers[i].drm_format, prime.layers[i].num_planes); + for(int j = 0; j < prime.layers[i].num_planes; ++j) { + const uint32_t object_index = prime.layers[i].object_index[j]; + fprintf(stderr, " object index: %u, offset: %u, pitch: %u, fd: %d, size: %u, drm format mod: %lu\n", object_index, prime.layers[i].offset[j], prime.layers[i].pitch[j], prime.objects[object_index].fd, prime.objects[object_index].size, prime.objects[object_index].drm_format_modifier); + } + } - src_frame->data[0] = (uint8_t*)desc; - src_frame->extended_data = src_frame->data; - src_frame->format = AV_PIX_FMT_DRM_PRIME; + #define EGL_LINUX_DRM_FOURCC_EXT 0x3271 + #define EGL_WIDTH 0x3057 + #define EGL_HEIGHT 0x3056 + #define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 + #define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 + #define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 + #define EGL_LINUX_DMA_BUF_EXT 0x3270 + + #define GL_TEXTURE0 0x84C0 + #define GL_COLOR_ATTACHMENT1 0x8CE1 + + #define FOURCC_NV12 842094158 + + if(prime.fourcc == FOURCC_NV12) { // This happens on AMD + while(cap_xcomp->egl.eglGetError() != EGL_SUCCESS){} + + EGLImage images[2]; + cap_xcomp->egl.glGenTextures(2, cap_xcomp->target_textures); + assert(cap_xcomp->egl.glGetError() == 0); + for(int i = 0; i < 2; ++i) { + const uint32_t formats[2] = { fourcc('R', '8', ' ', ' '), fourcc('G', 'R', '8', '8') }; + const int layer = i; + const int plane = 0; + + const intptr_t img_attr[] = { + EGL_LINUX_DRM_FOURCC_EXT, formats[i], + EGL_WIDTH, prime.width / (1 + i), // half size + EGL_HEIGHT, prime.height / (1 + i), // for chroma + EGL_DMA_BUF_PLANE0_FD_EXT, prime.objects[prime.layers[layer].object_index[plane]].fd, + EGL_DMA_BUF_PLANE0_OFFSET_EXT, prime.layers[layer].offset[plane], + EGL_DMA_BUF_PLANE0_PITCH_EXT, prime.layers[layer].pitch[plane], + EGL_NONE + }; + images[i] = cap_xcomp->egl.eglCreateImage(cap_xcomp->egl.egl_display, 0, EGL_LINUX_DMA_BUF_EXT, NULL, img_attr); // TODO: Cleanup at the end of this for loop + assert(images[i]); + assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS); + + //cap_xcomp->egl.glActiveTexture(GL_TEXTURE0 + i); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, cap_xcomp->target_textures[i]); + assert(cap_xcomp->egl.glGetError() == 0); + + cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + cap_xcomp->egl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + assert(cap_xcomp->egl.glGetError() == 0); + + cap_xcomp->egl.glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, images[i]); + assert(cap_xcomp->egl.glGetError() == 0); + assert(cap_xcomp->egl.eglGetError() == EGL_SUCCESS); + } + //cap_xcomp->egl.glActiveTexture(GL_TEXTURE0); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); + + + + cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->FramebufferNameY); + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferNameY); + + cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[0], 0); + // cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, cap_xcomp->target_textures[1], 0); + + // Set the list of draw buffers. + unsigned int DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers + + if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + fprintf(stderr, "Failed to setup framebuffer\n"); + return; + } + + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); + + cap_xcomp->egl.glGenFramebuffers(1, &cap_xcomp->FramebufferNameUV); + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferNameUV); + + cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, cap_xcomp->target_textures[1], 0); + // cap_xcomp->egl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, cap_xcomp->target_textures[1], 0); + + // Set the list of draw buffers. + cap_xcomp->egl.glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers + + if(cap_xcomp->egl.glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + fprintf(stderr, "Failed to setup framebuffer\n"); + return; + } + + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); + + //cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->quad_VertexArrayID); + //cap_xcomp->egl.glBindVertexArray(cap_xcomp->quad_VertexArrayID); + + static const float g_quad_vertex_buffer_data[] = { + -1.0f, -1.0f, 0.0f, + 1.0f, -1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, + -1.0f, 1.0f, 0.0f, + 1.0f, -1.0f, 0.0f, + 1.0f, 1.0f, 0.0f, + }; + + //cap_xcomp->egl.glGenBuffers(1, &cap_xcomp->quad_vertexbuffer); + //cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, cap_xcomp->quad_vertexbuffer); + //cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(g_quad_vertex_buffer_data), g_quad_vertex_buffer_data, GL_STATIC_DRAW); + + // Create and compile our GLSL program from the shaders + cap_xcomp->shader_y = LoadShadersY(cap_xcomp); + cap_xcomp->shader_uv = LoadShadersUV(cap_xcomp); + //int tex1 = cap_xcomp->egl.glGetUniformLocation(cap_xcomp->shader_y, "tex1"); + //cap_xcomp->egl.glUniform1i(tex1, 0); + //tex1 = cap_xcomp->egl.glGetUniformLocation(cap_xcomp->shader_uv, "tex1"); + //cap_xcomp->egl.glUniform1i(tex1, 0); + //int tex2 = cap_xcomp->egl.glGetUniformLocation(shader_program, "tex2"); + //fprintf(stderr, "uniform id: %u\n", tex1); + + float vVertices[] = { + -1.0f, 1.0f, 0.0f, 1.0f, + -1.0f, -1.0f, 0.0f, 0.0f, + 1.0f, -1.0f, 1.0f, 0.0f, + + -1.0f, 1.0f, 0.0f, 1.0f, + 1.0f, -1.0f, 1.0f, 0.0f, + 1.0f, 1.0f, 1.0f, 1.0f + }; + + unsigned int quadVBO; + cap_xcomp->egl.glGenVertexArrays(1, &cap_xcomp->quadVAO); + cap_xcomp->egl.glGenBuffers(1, &quadVBO); + cap_xcomp->egl.glBindVertexArray(cap_xcomp->quadVAO); + cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, quadVBO); + cap_xcomp->egl.glBufferData(GL_ARRAY_BUFFER, sizeof(vVertices), &vVertices, GL_STATIC_DRAW); + + cap_xcomp->egl.glEnableVertexAttribArray(0); + cap_xcomp->egl.glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); + + cap_xcomp->egl.glEnableVertexAttribArray(1); + cap_xcomp->egl.glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); + + cap_xcomp->egl.glBindVertexArray(0); - res = av_hwframe_map(*frame, src_frame, AV_HWFRAME_MAP_DIRECT); - if(res < 0) { - fprintf(stderr, "av_hwframe_map failed: %d\n", res); + //cap_xcomp->egl.glUniform1i(tex1, 0); + //cap_xcomp->egl.glUniform1i(tex2, 1); + + //cap_xcomp->egl.glViewport(0, 0, 1920, 1080); + + //cap_xcomp->egl.glBindBuffer(GL_ARRAY_BUFFER, 0); + //cap_xcomp->egl.glBindVertexArray(0); + } else { // This happens on intel + fprintf(stderr, "unexpected fourcc: %u, expected nv12\n", prime.fourcc); + abort(); } // Clear texture with black background because the source texture (window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)) // might be smaller than cap_xcomp->target_texture_id - cap_xcomp->egl.glClearTexImage(cap_xcomp->target_texture_id, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + // TODO: + //cap_xcomp->egl.glClearTexImage(cap_xcomp->target_texture_id, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); } } @@ -732,92 +860,35 @@ static bool gsr_capture_xcomposite_drm_should_stop(gsr_capture *cap, bool *err) #define GL_TRUE 1 #define GL_TRIANGLES 0x0004 -void FBO_2_PPM_file(gsr_capture_xcomposite_drm *cap_xcomp, int output_width, int output_height) -{ - FILE *output_image; - - /// READ THE PIXELS VALUES from FBO AND SAVE TO A .PPM FILE - int i, j, k; - unsigned char *pixels = (unsigned char*)malloc(output_width*output_height*3); - - unsigned int err = cap_xcomp->egl.glGetError(); - fprintf(stderr, "opengl err 1: %u\n", err); - - /// READ THE CONTENT FROM THE FBO - cap_xcomp->egl.glReadBuffer(GL_COLOR_ATTACHMENT0); - - err = cap_xcomp->egl.glGetError(); - fprintf(stderr, "opengl err 2: %u\n", err); - - cap_xcomp->egl.glReadPixels(0, 0, output_width, output_height, GL_RGBA, GL_UNSIGNED_BYTE, pixels); - - err = cap_xcomp->egl.glGetError(); - fprintf(stderr, "opengl err 3: %u\n", err); - - output_image = fopen("output.ppm", "wb"); - fprintf(output_image,"P3\n"); - fprintf(output_image,"# Created by Ricao\n"); - fprintf(output_image,"%d %d\n",output_width,output_height); - fprintf(output_image,"255\n"); - - k = 0; - for(i=0; ipriv; vec2i source_size = cap_xcomp->texture_size; - #if 1 - /* TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id */ - cap_xcomp->egl.glCopyImageSubData( - window_texture_get_opengl_texture_id(&cap_xcomp->window_texture), GL_TEXTURE_2D, 0, 0, 0, 0, - cap_xcomp->target_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, - source_size.x, source_size.y, 1); - unsigned int err = cap_xcomp->egl.glGetError(); - if(err != 0) { - static bool error_shown = false; - if(!error_shown) { - error_shown = true; - fprintf(stderr, "Error: glCopyImageSubData failed, gl error: %d\n", err); - } + cap_xcomp->egl.glBindVertexArray(cap_xcomp->quadVAO); + cap_xcomp->egl.glViewport(0, 0, source_size.x, source_size.y); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); + + { + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferNameY); + //cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); + + cap_xcomp->egl.glUseProgram(cap_xcomp->shader_y); + cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6); } - #elif 0 - cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferName); - cap_xcomp->egl.glViewport(0, 0, 1920, 1080); - //cap_xcomp->egl.glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); - cap_xcomp->egl.glUseProgram(shader_program); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, window_texture_get_opengl_texture_id(&cap_xcomp->window_texture)); - cap_xcomp->egl.glBindVertexArray(cap_xcomp->quadVAO); - cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6); - cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); + { + cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, cap_xcomp->FramebufferNameUV); + //cap_xcomp->egl.glClear(GL_COLOR_BUFFER_BIT); - static int counter = 0; - ++counter; - static bool image_saved = false; - if(!image_saved && counter == 5) { - image_saved = true; - FBO_2_PPM_file(cap_xcomp, 1920, 1080); - fprintf(stderr, "saved image!\n"); + cap_xcomp->egl.glUseProgram(cap_xcomp->shader_uv); + cap_xcomp->egl.glDrawArrays(GL_TRIANGLES, 0, 6); } cap_xcomp->egl.glBindVertexArray(0); cap_xcomp->egl.glUseProgram(0); + cap_xcomp->egl.glBindTexture(GL_TEXTURE_2D, 0); cap_xcomp->egl.glBindFramebuffer(GL_FRAMEBUFFER, 0); - #endif + cap_xcomp->egl.eglSwapBuffers(cap_xcomp->egl.egl_display, cap_xcomp->egl.egl_surface); return 0; @@ -825,10 +896,15 @@ static int gsr_capture_xcomposite_drm_capture(gsr_capture *cap, AVFrame *frame) static void gsr_capture_xcomposite_drm_destroy(gsr_capture *cap, AVCodecContext *video_codec_context) { (void)video_codec_context; + gsr_capture_xcomposite_drm *cap_xcomp = cap->priv; if(cap->priv) { free(cap->priv); cap->priv = NULL; } + if(cap_xcomp->dpy) { + XCloseDisplay(cap_xcomp->dpy); + cap_xcomp->dpy = NULL; + } free(cap); } diff --git a/src/cuda.c b/src/cuda.c index 0c42d74..3076ebe 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -37,15 +37,13 @@ bool gsr_cuda_load(gsr_cuda *self) { { NULL, NULL } }; + CUresult res; + if(!dlsym_load_list(lib, required_dlsym)) { fprintf(stderr, "gsr error: gsr_cuda_load failed: missing required symbols in libcuda.so/libcuda.so.1\n"); - dlclose(lib); - memset(self, 0, sizeof(gsr_cuda)); - return false; + goto fail; } - CUresult res; - res = self->cuInit(0); if(res != CUDA_SUCCESS) { const char *err_str = "unknown"; diff --git a/src/egl.c b/src/egl.c index dbc8928..325a06f 100644 --- a/src/egl.c +++ b/src/egl.c @@ -57,7 +57,10 @@ static bool gsr_egl_create_window(gsr_egl *self) { goto fail; } - self->eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); + if(!self->eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context)) { + fprintf(stderr, "gsr error: gsr_egl_create_window failed: failed to make context current\n"); + goto fail; + } self->egl_display = egl_display; self->egl_surface = egl_surface; @@ -79,6 +82,7 @@ static bool gsr_egl_create_window(gsr_egl *self) { static bool gsr_egl_load_egl(gsr_egl *self, void *library) { dlsym_assign required_dlsym[] = { + { (void**)&self->eglGetError, "eglGetError" }, { (void**)&self->eglGetDisplay, "eglGetDisplay" }, { (void**)&self->eglInitialize, "eglInitialize" }, { (void**)&self->eglTerminate, "eglTerminate" }, @@ -182,52 +186,45 @@ bool gsr_egl_load(gsr_egl *self, Display *dpy) { memset(self, 0, sizeof(gsr_egl)); self->dpy = dpy; + void *egl_lib = NULL; + void *gl_lib = NULL; + dlerror(); /* clear */ - void *egl_lib = dlopen("libEGL.so.1", RTLD_LAZY); + egl_lib = dlopen("libEGL.so.1", RTLD_LAZY); if(!egl_lib) { fprintf(stderr, "gsr error: gsr_egl_load: failed to load libEGL.so.1, error: %s\n", dlerror()); - return false; + goto fail; } - void *gl_lib = dlopen("libGL.so.1", RTLD_LAZY); + gl_lib = dlopen("libGL.so.1", RTLD_LAZY); if(!egl_lib) { fprintf(stderr, "gsr error: gsr_egl_load: failed to load libGL.so.1, error: %s\n", dlerror()); - dlclose(egl_lib); - memset(self, 0, sizeof(gsr_egl)); - return false; + goto fail; } - if(!gsr_egl_load_egl(self, egl_lib)) { - dlclose(egl_lib); - dlclose(gl_lib); - memset(self, 0, sizeof(gsr_egl)); - return false; - } + if(!gsr_egl_load_egl(self, egl_lib)) + goto fail; - if(!gsr_egl_load_gl(self, gl_lib)) { - dlclose(egl_lib); - dlclose(gl_lib); - memset(self, 0, sizeof(gsr_egl)); - return false; - } + if(!gsr_egl_load_gl(self, gl_lib)) + goto fail; - if(!gsr_egl_proc_load_egl(self)) { - dlclose(egl_lib); - dlclose(gl_lib); - memset(self, 0, sizeof(gsr_egl)); - return false; - } + if(!gsr_egl_proc_load_egl(self)) + goto fail; - if(!gsr_egl_create_window(self)) { - dlclose(egl_lib); - dlclose(gl_lib); - memset(self, 0, sizeof(gsr_egl)); - return false; - } + if(!gsr_egl_create_window(self)) + goto fail; self->egl_library = egl_lib; self->gl_library = gl_lib; return true; + + fail: + if(egl_lib) + dlclose(egl_lib); + if(gl_lib) + dlclose(gl_lib); + memset(self, 0, sizeof(gsr_egl)); + return false; } void gsr_egl_unload(gsr_egl *self) { diff --git a/src/main.cpp b/src/main.cpp index 5581e77..1c6dad9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -335,7 +335,7 @@ static AVCodecContext* create_audio_codec_context(int fps, AudioCodec audio_code static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, VideoQuality video_quality, - int fps, const AVCodec *codec, bool is_livestream) { + int fps, const AVCodec *codec, bool is_livestream, gpu_vendor vendor) { AVCodecContext *codec_context = avcodec_alloc_context3(codec); @@ -415,6 +415,13 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, av_opt_set_int(codec_context->priv_data, "b_ref_mode", 0, 0); + if(vendor != GPU_VENDOR_NVIDIA) { + // TODO: More options, better options + //codec_context->bit_rate = codec_context->width * codec_context->height; + av_opt_set(codec_context->priv_data, "rc_mode", "CQP", 0); + codec_context->global_quality = 4; + } + //codec_context->rc_max_rate = codec_context->bit_rate; //codec_context->rc_min_rate = codec_context->bit_rate; //codec_context->rc_buffer_size = codec_context->bit_rate / 10; @@ -424,10 +431,14 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, return codec_context; } -static bool check_if_codec_valid_for_hardware(const AVCodec *codec) { +static bool check_if_codec_valid_for_hardware(const AVCodec *codec, gpu_vendor vendor) { + // TODO: For now we assume that amd and intel always support h264 and hevc, but we default to h264 + if(vendor != GPU_VENDOR_NVIDIA) + return true; + bool success = false; // Do not use AV_PIX_FMT_CUDA because we dont want to do full check with hardware context - AVCodecContext *codec_context = create_video_codec_context(AV_PIX_FMT_YUV420P, VideoQuality::VERY_HIGH, 60, codec, false); + AVCodecContext *codec_context = create_video_codec_context(AV_PIX_FMT_YUV420P, VideoQuality::VERY_HIGH, 60, codec, false, vendor); codec_context->width = 1920; codec_context->height = 1080; if(codec_context) { @@ -446,7 +457,7 @@ static const AVCodec* find_h264_encoder(gpu_vendor vendor) { static bool checked_success = true; if(!checked) { checked = true; - if(!check_if_codec_valid_for_hardware(codec)) + if(!check_if_codec_valid_for_hardware(codec, vendor)) checked_success = false; } return checked_success ? codec : nullptr; @@ -466,7 +477,7 @@ static const AVCodec* find_h265_encoder(gpu_vendor vendor) { static bool checked_success = true; if(!checked) { checked = true; - if(!check_if_codec_valid_for_hardware(codec)) + if(!check_if_codec_valid_for_hardware(codec, vendor)) checked_success = false; } return checked_success ? codec : nullptr; @@ -508,36 +519,78 @@ static AVFrame* open_audio(AVCodecContext *audio_codec_context) { return frame; } -static void open_video(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu) { - bool supports_p4 = false; - bool supports_p6 = false; - - const AVOption *opt = nullptr; - while((opt = av_opt_next(codec_context->priv_data, opt))) { - if(opt->type == AV_OPT_TYPE_CONST) { - if(strcmp(opt->name, "p4") == 0) - supports_p4 = true; - else if(strcmp(opt->name, "p6") == 0) - supports_p6 = true; +static void open_video(AVCodecContext *codec_context, VideoQuality video_quality, bool very_old_gpu, gpu_vendor vendor) { + AVDictionary *options = nullptr; + if(vendor == GPU_VENDOR_NVIDIA) { + bool supports_p4 = false; + bool supports_p6 = false; + + const AVOption *opt = nullptr; + while((opt = av_opt_next(codec_context->priv_data, opt))) { + if(opt->type == AV_OPT_TYPE_CONST) { + if(strcmp(opt->name, "p4") == 0) + supports_p4 = true; + else if(strcmp(opt->name, "p6") == 0) + supports_p6 = true; + } } - } - AVDictionary *options = nullptr; - if(very_old_gpu) { - switch(video_quality) { - case VideoQuality::MEDIUM: - av_dict_set_int(&options, "qp", 37, 0); - break; - case VideoQuality::HIGH: - av_dict_set_int(&options, "qp", 32, 0); - break; - case VideoQuality::VERY_HIGH: - av_dict_set_int(&options, "qp", 27, 0); - break; - case VideoQuality::ULTRA: - av_dict_set_int(&options, "qp", 21, 0); - break; + if(very_old_gpu) { + switch(video_quality) { + case VideoQuality::MEDIUM: + av_dict_set_int(&options, "qp", 37, 0); + break; + case VideoQuality::HIGH: + av_dict_set_int(&options, "qp", 32, 0); + break; + case VideoQuality::VERY_HIGH: + av_dict_set_int(&options, "qp", 27, 0); + break; + case VideoQuality::ULTRA: + av_dict_set_int(&options, "qp", 21, 0); + break; + } + } else { + switch(video_quality) { + case VideoQuality::MEDIUM: + av_dict_set_int(&options, "qp", 40, 0); + break; + case VideoQuality::HIGH: + av_dict_set_int(&options, "qp", 35, 0); + break; + case VideoQuality::VERY_HIGH: + av_dict_set_int(&options, "qp", 30, 0); + break; + case VideoQuality::ULTRA: + av_dict_set_int(&options, "qp", 24, 0); + break; + } } + + if(!supports_p4 && !supports_p6) + fprintf(stderr, "Info: your ffmpeg version is outdated. It's recommended that you use the flatpak version of gpu-screen-recorder version instead, which you can find at https://flathub.org/apps/details/com.dec05eba.gpu_screen_recorder\n"); + + //if(is_livestream) { + // av_dict_set_int(&options, "zerolatency", 1, 0); + // //av_dict_set(&options, "preset", "llhq", 0); + //} + + // Fuck nvidia and ffmpeg, I want to use a good preset for the gpu but all gpus prefer different + // presets. Nvidia and ffmpeg used to support "hq" preset that chose the best preset for the gpu + // with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on + // older gpus p5-p7 slow the gpu down to a crawl... + // "hq" is now just an alias for p7 in ffmpeg :( + // TODO: Temporary disable because of stuttering? + if(very_old_gpu) + av_dict_set(&options, "preset", supports_p4 ? "p4" : "medium", 0); + else + av_dict_set(&options, "preset", supports_p6 ? "p6" : "slow", 0); + + av_dict_set(&options, "tune", "hq", 0); + av_dict_set(&options, "rc", "constqp", 0); + + if(codec_context->codec_id == AV_CODEC_ID_H264) + av_dict_set(&options, "profile", "high", 0); } else { switch(video_quality) { case VideoQuality::MEDIUM: @@ -553,32 +606,19 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality av_dict_set_int(&options, "qp", 24, 0); break; } - } - - if(!supports_p4 && !supports_p6) - fprintf(stderr, "Info: your ffmpeg version is outdated. It's recommended that you use the flatpak version of gpu-screen-recorder version instead, which you can find at https://flathub.org/apps/details/com.dec05eba.gpu_screen_recorder\n"); - //if(is_livestream) { - // av_dict_set_int(&options, "zerolatency", 1, 0); - // //av_dict_set(&options, "preset", "llhq", 0); - //} + // TODO: More quality options + av_dict_set(&options, "rc_mode", "CQP", 0); + //av_dict_set_int(&options, "low_power", 1, 0); - // Fuck nvidia and ffmpeg, I want to use a good preset for the gpu but all gpus prefer different - // presets. Nvidia and ffmpeg used to support "hq" preset that chose the best preset for the gpu - // with pretty good performance but you now have to choose p1-p7, which are gpu agnostic and on - // older gpus p5-p7 slow the gpu down to a crawl... - // "hq" is now just an alias for p7 in ffmpeg :( - // TODO: Temporary disable because of stuttering? - if(very_old_gpu) - av_dict_set(&options, "preset", supports_p4 ? "p4" : "medium", 0); - else - av_dict_set(&options, "preset", supports_p6 ? "p6" : "slow", 0); - - av_dict_set(&options, "tune", "hq", 0); - av_dict_set(&options, "rc", "constqp", 0); - - if(codec_context->codec_id == AV_CODEC_ID_H264) - av_dict_set(&options, "profile", "high", 0); + if(codec_context->codec_id == AV_CODEC_ID_H264) { + av_dict_set(&options, "profile", "high", 0); + av_dict_set(&options, "coder", "cavlc", 0);// TODO: cavlc is faster than cabac but worse compression. Which to use? + av_dict_set_int(&options, "quality", 50, 0); + } else { + av_dict_set(&options, "profile", "main", 0); + } + } av_dict_set(&options, "strict", "experimental", 0); @@ -602,7 +642,7 @@ static void usage() { fprintf(stderr, " -r Replay buffer size in seconds. If this is set, then only the last seconds as set by this option will be stored" " and the video will only be saved when the gpu-screen-recorder is closed. This feature is similar to Nvidia's instant replay feature." " This option has be between 5 and 1200. Note that the replay buffer size will not always be precise, because of keyframes. Optional, disabled by default.\n"); - fprintf(stderr, " -k Video codec to use. Should be either 'auto', 'h264' or 'h265'. Defaults to 'auto' which defaults to 'h265' unless recording at a higher resolution than 3840x2160. Forcefully set to 'h264' if -c is 'flv'.\n"); + fprintf(stderr, " -k Video codec to use. Should be either 'auto', 'h264' or 'h265'. Defaults to 'auto' which defaults to 'h265' on nvidia unless recording at a higher resolution than 3840x2160. On AMD/Intel this defaults to 'auto' which defaults to 'h264'. Forcefully set to 'h264' if -c is 'flv'.\n"); fprintf(stderr, " -ac Audio codec to use. Should be either 'aac', 'opus' or 'flac'. Defaults to 'opus' for .mp4/.mkv files, otherwise defaults to 'aac'. 'opus' and 'flac' is only supported by .mp4/.mkv files. 'opus' is recommended for best performance and smallest audio size.\n"); fprintf(stderr, " -o The output file path. If omitted then the encoded data is sent to stdout. Required in replay mode (when using -r). In replay mode this has to be an existing directory instead of a file.\n"); fprintf(stderr, "NOTES:\n"); @@ -1387,23 +1427,41 @@ int main(int argc, char **argv) { const double target_fps = 1.0 / (double)fps; if(strcmp(video_codec_to_use, "auto") == 0) { - const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor); - - // h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k. - // Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1 - // while with h264 the fps doesn't drop. - if(!h265_codec) { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; - } else if(fps > 60) { - fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); - video_codec_to_use = "h264"; - video_codec = VideoCodec::H264; + if(gpu_inf.vendor == GPU_VENDOR_NVIDIA) { + const AVCodec *h265_codec = find_h265_encoder(gpu_inf.vendor); + + // h265 generally allows recording at a higher resolution than h264 on nvidia cards. On a gtx 1080 4k is the max resolution for h264 but for h265 it's 8k. + // Another important info is that when recording at a higher fps than.. 60? h265 has very bad performance. For example when recording at 144 fps the fps drops to 1 + // while with h264 the fps doesn't drop. + if(!h265_codec) { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified and your gpu does not support h265\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; + } else if(fps > 60) { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; + } else { + fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n"); + video_codec_to_use = "h265"; + video_codec = VideoCodec::H265; + } } else { - fprintf(stderr, "Info: using h265 encoder because a codec was not specified\n"); - video_codec_to_use = "h265"; - video_codec = VideoCodec::H265; + const AVCodec *h264_codec = find_h264_encoder(gpu_inf.vendor); + + if(!h264_codec) { + fprintf(stderr, "Info: using h265 encoder because a codec was not specified and your gpu does not support h264\n"); + video_codec_to_use = "h265"; + video_codec = VideoCodec::H265; + //} else if(fps > 60) { + // fprintf(stderr, "Info: using h264 encoder because a codec was not specified and fps is more than 60\n"); + // video_codec_to_use = "h264"; + // video_codec = VideoCodec::H264; + } else { + fprintf(stderr, "Info: using h264 encoder because a codec was not specified\n"); + video_codec_to_use = "h264"; + video_codec = VideoCodec::H264; + } } } @@ -1442,7 +1500,7 @@ int main(int argc, char **argv) { AVStream *video_stream = nullptr; std::vector audio_tracks; - AVCodecContext *video_codec_context = create_video_codec_context(gpu_inf.vendor == GPU_VENDOR_NVIDIA ? AV_PIX_FMT_CUDA : AV_PIX_FMT_VAAPI, quality, fps, video_codec_f, is_livestream); + AVCodecContext *video_codec_context = create_video_codec_context(gpu_inf.vendor == GPU_VENDOR_NVIDIA ? AV_PIX_FMT_CUDA : AV_PIX_FMT_VAAPI, quality, fps, video_codec_f, is_livestream, gpu_inf.vendor); if(replay_buffer_size_secs == -1) video_stream = create_stream(av_format_context, video_codec_context); @@ -1451,7 +1509,7 @@ int main(int argc, char **argv) { return 1; } - open_video(video_codec_context, quality, very_old_gpu); + open_video(video_codec_context, quality, very_old_gpu, gpu_inf.vendor); if(video_stream) avcodec_parameters_from_context(video_stream->codecpar, video_codec_context); diff --git a/src/vaapi.c b/src/vaapi.c new file mode 100644 index 0000000..bb1b1fd --- /dev/null +++ b/src/vaapi.c @@ -0,0 +1,41 @@ +#include "../include/vaapi.h" +#include "../include/library_loader.h" +#include + +bool gsr_vaapi_load(gsr_vaapi *self) { + memset(self, 0, sizeof(gsr_vaapi)); + + dlerror(); /* clear */ + void *lib = dlopen("libva.so.2", RTLD_LAZY); + if(!lib) { + fprintf(stderr, "gsr error: gsr_vaapi_load failed: failed to load libva.so, error: %s\n", dlerror()); + return false; + } + + dlsym_assign required_dlsym[] = { + { (void**)&self->vaExportSurfaceHandle, "vaExportSurfaceHandle" }, + { (void**)&self->vaSyncSurface, "vaSyncSurface" }, + + { NULL, NULL } + }; + + if(!dlsym_load_list(lib, required_dlsym)) { + fprintf(stderr, "gsr error: gsr_vaapi_load failed: missing required symbols in libcuda.so/libcuda.so.1\n"); + goto fail; + } + + self->library = lib; + return true; + + fail: + dlclose(lib); + memset(self, 0, sizeof(gsr_vaapi)); + return false; +} + +void gsr_vaapi_unload(gsr_vaapi *self) { + if(self->library) { + dlclose(self->library); + memset(self, 0, sizeof(gsr_vaapi)); + } +} diff --git a/src/window_texture.c b/src/window_texture.c index 72a2474..df34a37 100644 --- a/src/window_texture.c +++ b/src/window_texture.c @@ -85,8 +85,10 @@ int window_texture_on_resize(WindowTexture *self) { self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + self->egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + while(self->egl->eglGetError() != EGL_SUCCESS) {} image = self->egl->eglCreateImage(self->egl->egl_display, NULL, EGL_NATIVE_PIXMAP_KHR, (EGLClientBuffer)pixmap, pixmap_attrs); if(!image) { @@ -95,7 +97,7 @@ int window_texture_on_resize(WindowTexture *self) { } self->egl->glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, image); - if(self->egl->glGetError() != 0) { + if(self->egl->glGetError() != 0 || self->egl->eglGetError() != EGL_SUCCESS) { result = 5; goto cleanup; } -- cgit v1.2.3