diff options
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | include/defs.h | 5 | ||||
-rw-r--r-- | include/egl.h | 15 | ||||
-rw-r--r-- | project.conf | 2 | ||||
-rw-r--r-- | src/egl.c | 8 | ||||
-rw-r--r-- | src/encoder/video/vulkan.c | 267 | ||||
-rw-r--r-- | src/main.cpp | 36 | ||||
-rw-r--r-- | src/utils.c | 19 |
9 files changed, 183 insertions, 177 deletions
@@ -39,7 +39,7 @@ For you as a user this only means that if you installed GPU Screen Recorder as a On a system with a i5 4690k CPU and a GTX 1080 GPU:\ When recording Legend of Zelda Breath of the Wild at 4k, fps drops from 30 to 7 when using OBS Studio + nvenc, however when using this screen recorder the fps remains at 30.\ When recording GTA V at 4k on highest settings, fps drops from 60 to 23 when using obs-nvfbc + nvenc, however when using this screen recorder the fps only drops to 58.\ -GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg).\ +GPU Screen Recorder also produces much smoother videos than OBS when GPU utilization is close to 100%, see comparison here: [https://www.youtube.com/watch?v=zfj4sNVLLLg](https://www.youtube.com/watch?v=zfj4sNVLLLg) and [https://www.youtube.com/watch?v=aK67RSZw2ZQ](https://www.youtube.com/watch?v=aK67RSZw2ZQ).\ GPU Screen Recorder has much better performance than OBS Studio even with version 30.2 that does "zero-copy" recording and encoding, see: [https://www.youtube.com/watch?v=jdroRjibsDw](https://www.youtube.com/watch?v=jdroRjibsDw).\ It is recommended to save the video to a SSD because of the large file size, which a slow HDD might not be fast enough to handle. Using variable framerate mode (-fm vfr) which is the default is also recommended as this reduces encoding load. Ultra quality is also overkill most of the time, very high (the default) or lower quality is usually enough.\ Note that for best performance you should close other screen recorders such as OBS Studio when using GPU Screen Recorder even if they are not recording, since they can affect performance even when idle. This is the case with OBS Studio. @@ -187,4 +187,4 @@ To fix this you can either record the video in .mkv format or constant frame rat KDE Plasma version 6.2 broke HDR and ICC profiles for screen recorders. This was changed in KDE plasma version 6.3 and recording HDR works now, as long as you set HDR brightness to 100% (which means setting "Maximum SDR Brightness" in KDE plasma display settings to 203) and set color accuracy to "Prefer color accuracy". If you want to convert HDR to SDR then record with desktop portal option (`-w portal`) instead. I don't know how well recording HDR works in wayland compositors other than KDE plasma. ## GPU Screen Recorder starts lagging after 30-40 minutes when launching GPU Screen Recorder from steam command launcher -This is a [steam issue](https://github.com/ValveSoftware/steam-for-linux/issues/11446). Prepend the gpu-screen-recorder command with `LD_PREFIX=""`, for example `LD_PREFIX="" gpu-screen-recorder -w screen -o video.mp4`.
\ No newline at end of file +This is a [steam issue](https://github.com/ValveSoftware/steam-for-linux/issues/11446). Prepend the gpu-screen-recorder command with `LD_PREFIX=""`, for example `LD_PREFIX="" gpu-screen-recorder -w screen -o video.mp4`. @@ -265,3 +265,7 @@ nvfbc capture cursor with cursor.h instead and composite that on top. This allow Maybe remove external shader code and make a simple external to internal texture converter (compute shader), to reduce texture sampling. Maybe this is faster? Fix opengl context broken after suspend on nvidia by using this: https://registry.khronos.org/OpenGL/extensions/NV/NV_robustness_video_memory_purge.txt requires glx context creation flags and GetGraphicsResetStatusARB() == PURGED_CONTEXT_RESET_NV check to recreate all graphics. + +HDR looks incorrect, brightest point gets cut off. + +Make "screen" capture the preferred monitor. diff --git a/include/defs.h b/include/defs.h index 365a6e2..cbb5618 100644 --- a/include/defs.h +++ b/include/defs.h @@ -14,11 +14,6 @@ typedef struct { gsr_gpu_vendor vendor; int gpu_version; /* 0 if unknown */ bool is_steam_deck; - - /* Only currently set for Mesa. 0 if unknown format */ - int driver_major; - int driver_minor; - int driver_patch; } gsr_gpu_info; typedef enum { diff --git a/include/egl.h b/include/egl.h index 730502f..8e7496c 100644 --- a/include/egl.h +++ b/include/egl.h @@ -169,6 +169,13 @@ typedef void (*GLDEBUGPROC)(unsigned int source, unsigned int type, unsigned int typedef int (*FUNC_eglQueryDisplayAttribEXT)(EGLDisplay dpy, int32_t attribute, intptr_t *value); typedef const char* (*FUNC_eglQueryDeviceStringEXT)(void *device, int32_t name); typedef int (*FUNC_eglQueryDmaBufModifiersEXT)(EGLDisplay dpy, int32_t format, int32_t max_modifiers, uint64_t *modifiers, int *external_only, int32_t *num_modifiers); +typedef void (*FUNC_glCreateMemoryObjectsEXT)(int n, unsigned int *memoryObjects); +typedef void (*FUNC_glImportMemoryFdEXT)(unsigned int memory, uint64_t size, unsigned int handleType, int fd); +typedef unsigned char (*FUNC_glIsMemoryObjectEXT)(unsigned int memoryObject); +typedef void (*FUNC_glTexStorageMem2DEXT)(unsigned int target, int levels, unsigned int internalFormat, int width, int height, unsigned int memory, uint64_t offset); +typedef void (*FUNC_glBufferStorageMemEXT)(unsigned int target, ssize_t size, unsigned int memory, uint64_t offset); +typedef void (*FUNC_glNamedBufferStorageMemEXT)(unsigned int buffer, ssize_t size, unsigned int memory, uint64_t offset); +typedef void (*FUNC_glMemoryObjectParameterivEXT)(unsigned int memoryObject, unsigned int pname, const int *params); typedef enum { GSR_GL_CONTEXT_TYPE_EGL, @@ -219,6 +226,13 @@ struct gsr_egl { FUNC_eglQueryDisplayAttribEXT eglQueryDisplayAttribEXT; FUNC_eglQueryDeviceStringEXT eglQueryDeviceStringEXT; FUNC_eglQueryDmaBufModifiersEXT eglQueryDmaBufModifiersEXT; + FUNC_glCreateMemoryObjectsEXT glCreateMemoryObjectsEXT; + FUNC_glImportMemoryFdEXT glImportMemoryFdEXT; + FUNC_glIsMemoryObjectEXT glIsMemoryObjectEXT; + FUNC_glTexStorageMem2DEXT glTexStorageMem2DEXT; + FUNC_glBufferStorageMemEXT glBufferStorageMemEXT; + FUNC_glNamedBufferStorageMemEXT glNamedBufferStorageMemEXT; + FUNC_glMemoryObjectParameterivEXT glMemoryObjectParameterivEXT; __GLXextFuncPtr (*glXGetProcAddress)(const unsigned char *procName); GLXFBConfig* (*glXChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems); @@ -298,6 +312,7 @@ struct gsr_egl { void (*glUniformMatrix2fv)(int location, int count, unsigned char transpose, const float *value); void (*glDebugMessageCallback)(GLDEBUGPROC callback, const void *userParam); void (*glScissor)(int x, int y, int width, int height); + void (*glCreateBuffers)(int n, unsigned int *buffers); void (*glReadPixels)(int x, int y, int width, int height, unsigned int format, unsigned int type, void *pixels); void* (*glMapBuffer)(unsigned int target, unsigned int access); unsigned char (*glUnmapBuffer)(unsigned int target); diff --git a/project.conf b/project.conf index bc8ec9f..376918d 100644 --- a/project.conf +++ b/project.conf @@ -33,4 +33,4 @@ wayland-client = ">=1" dbus-1 = ">=1" libpipewire-0.3 = ">=1" libspa-0.2 = ">=0" -#vulkan = ">=1" +vulkan = ">=1" @@ -225,6 +225,14 @@ static bool gsr_egl_proc_load_egl(gsr_egl *self) { self->eglQueryDeviceStringEXT = (FUNC_eglQueryDeviceStringEXT)self->eglGetProcAddress("eglQueryDeviceStringEXT"); self->eglQueryDmaBufModifiersEXT = (FUNC_eglQueryDmaBufModifiersEXT)self->eglGetProcAddress("eglQueryDmaBufModifiersEXT"); + self->glCreateMemoryObjectsEXT = (FUNC_glCreateMemoryObjectsEXT)self->eglGetProcAddress("glCreateMemoryObjectsEXT"); + self->glImportMemoryFdEXT = (FUNC_glImportMemoryFdEXT)self->eglGetProcAddress("glImportMemoryFdEXT"); + self->glIsMemoryObjectEXT = (FUNC_glIsMemoryObjectEXT)self->eglGetProcAddress("glIsMemoryObjectEXT"); + self->glTexStorageMem2DEXT = (FUNC_glTexStorageMem2DEXT)self->eglGetProcAddress("glTexStorageMem2DEXT"); + self->glBufferStorageMemEXT = (FUNC_glBufferStorageMemEXT)self->eglGetProcAddress("glBufferStorageMemEXT"); + self->glNamedBufferStorageMemEXT = (FUNC_glNamedBufferStorageMemEXT)self->eglGetProcAddress("glNamedBufferStorageMemEXT"); + self->glMemoryObjectParameterivEXT = (FUNC_glMemoryObjectParameterivEXT)self->eglGetProcAddress("glMemoryObjectParameterivEXT"); + if(!self->eglExportDMABUFImageQueryMESA) { fprintf(stderr, "gsr error: gsr_egl_load failed: could not find eglExportDMABUFImageQueryMESA\n"); return false; diff --git a/src/encoder/video/vulkan.c b/src/encoder/video/vulkan.c index 7210870..7643ada 100644 --- a/src/encoder/video/vulkan.c +++ b/src/encoder/video/vulkan.c @@ -8,26 +8,15 @@ //#include <vulkan/vulkan_core.h> +#define GL_HANDLE_TYPE_OPAQUE_FD_EXT 0x9586 #define GL_TEXTURE_TILING_EXT 0x9580 #define GL_OPTIMAL_TILING_EXT 0x9584 #define GL_LINEAR_TILING_EXT 0x9585 -#define GL_PIXEL_PACK_BUFFER 0x88EB -#define GL_PIXEL_UNPACK_BUFFER 0x88EC -#define GL_STREAM_READ 0x88E1 -#define GL_STREAM_DRAW 0x88E0 -#define GL_READ_ONLY 0x88B8 -#define GL_WRITE_ONLY 0x88B9 -#define GL_READ_FRAMEBUFFER 0x8CA8 - typedef struct { gsr_video_encoder_vulkan_params params; unsigned int target_textures[2]; AVBufferRef *device_ctx; - AVVulkanDeviceContext* vv; - unsigned int pbo_y[2]; - unsigned int pbo_uv[2]; - AVFrame *sw_frame; } gsr_video_encoder_vulkan; static bool gsr_video_encoder_vulkan_setup_context(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) { @@ -84,6 +73,24 @@ static AVVulkanDeviceContext* video_codec_context_get_vulkan_data(AVCodecContext return (AVVulkanDeviceContext*)device_context->hwctx; } +static uint32_t get_memory_type_idx(VkPhysicalDevice pdev, const VkMemoryRequirements *mem_reqs, VkMemoryPropertyFlagBits prop_flags, PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties) { + VkPhysicalDeviceMemoryProperties pdev_mem_props; + uint32_t i; + + vkGetPhysicalDeviceMemoryProperties(pdev, &pdev_mem_props); + + for (i = 0; i < pdev_mem_props.memoryTypeCount; i++) { + const VkMemoryType *type = &pdev_mem_props.memoryTypes[i]; + + if ((mem_reqs->memoryTypeBits & (1 << i)) && + (type->propertyFlags & prop_flags) == prop_flags) { + return i; + break; + } + } + return UINT32_MAX; +} + static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context, AVFrame *frame) { const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); if(res < 0) { @@ -91,56 +98,133 @@ static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *se return false; } - //AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; - self->vv = video_codec_context_get_vulkan_data(video_codec_context); + while(self->params.egl->glGetError()) {} + + AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; + AVVulkanDeviceContext* vv = video_codec_context_get_vulkan_data(video_codec_context); + const size_t luma_size = frame->width * frame->height; + if(vv) { + PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vv->get_proc_addr(vv->inst, "vkGetImageMemoryRequirements"); + PFN_vkAllocateMemory vkAllocateMemory = (PFN_vkAllocateMemory)vv->get_proc_addr(vv->inst, "vkAllocateMemory"); + PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vv->get_proc_addr(vv->inst, "vkGetPhysicalDeviceMemoryProperties"); + PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vv->get_proc_addr(vv->inst, "vkGetMemoryFdKHR"); + + VkMemoryRequirements mem_reqs = {0}; + vkGetImageMemoryRequirements(vv->act_dev, target_surface_id->img[0], &mem_reqs); + + fprintf(stderr, "size: %lu, alignment: %lu, memory bits: 0x%08x\n", mem_reqs.size, mem_reqs.alignment, mem_reqs.memoryTypeBits); + VkDeviceMemory mem; + { + VkExportMemoryAllocateInfo exp_mem_info; + VkMemoryAllocateInfo mem_alloc_info; + VkMemoryDedicatedAllocateInfoKHR ded_info; + + memset(&exp_mem_info, 0, sizeof(exp_mem_info)); + exp_mem_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + exp_mem_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + memset(&ded_info, 0, sizeof(ded_info)); + ded_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + ded_info.image = target_surface_id->img[0]; + + exp_mem_info.pNext = &ded_info; + + memset(&mem_alloc_info, 0, sizeof(mem_alloc_info)); + mem_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + mem_alloc_info.pNext = &exp_mem_info; + mem_alloc_info.allocationSize = target_surface_id->size[0]; + mem_alloc_info.memoryTypeIndex = get_memory_type_idx(vv->phys_dev, &mem_reqs, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vkGetPhysicalDeviceMemoryProperties); + + if (mem_alloc_info.memoryTypeIndex == UINT32_MAX) { + fprintf(stderr, "No suitable memory type index found.\n"); + return VK_NULL_HANDLE; + } + + if (vkAllocateMemory(vv->act_dev, &mem_alloc_info, 0, &mem) != + VK_SUCCESS) + return VK_NULL_HANDLE; + + fprintf(stderr, "memory: %p\n", (void*)mem); - const unsigned int internal_formats_nv12[2] = { GL_RGBA8, GL_RGBA8 }; // TODO: GL_R8, GL_R16 - const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; - const unsigned int formats[2] = { GL_RED, GL_RG }; - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + } - for(int i = 0; i < 2; ++i) { - self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i], GL_NEAREST); - if(self->target_textures[i] == 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: failed to create opengl texture\n"); - return false; + fprintf(stderr, "target surface id: %p, %zu, %zu\n", (void*)target_surface_id->mem[0], target_surface_id->offset[0], target_surface_id->offset[1]); + fprintf(stderr, "vkGetMemoryFdKHR: %p\n", (void*)vkGetMemoryFdKHR); + + int fd = 0; + VkMemoryGetFdInfoKHR fd_info; + memset(&fd_info, 0, sizeof(fd_info)); + fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info.memory = target_surface_id->mem[0]; + fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + if(vkGetMemoryFdKHR(vv->act_dev, &fd_info, &fd) != VK_SUCCESS) { + fprintf(stderr, "failed!\n"); + } else { + fprintf(stderr, "fd: %d\n", fd); } - } - self->params.egl->glGenBuffers(2, self->pbo_y); + fprintf(stderr, "glImportMemoryFdEXT: %p, size: %zu\n", (void*)self->params.egl->glImportMemoryFdEXT, target_surface_id->size[0]); + const int tiling = target_surface_id->tiling == VK_IMAGE_TILING_LINEAR ? GL_LINEAR_TILING_EXT : GL_OPTIMAL_TILING_EXT; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[0]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + if(tiling != GL_OPTIMAL_TILING_EXT) { + fprintf(stderr, "tiling %d is not supported, only GL_OPTIMAL_TILING_EXT (%d) is supported\n", tiling, GL_OPTIMAL_TILING_EXT); + } - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[1]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + unsigned int gl_memory_obj = 0; + self->params.egl->glCreateMemoryObjectsEXT(1, &gl_memory_obj); - self->params.egl->glGenBuffers(2, self->pbo_uv); + //const int dedicated = GL_TRUE; + //self->params.egl->glMemoryObjectParameterivEXT(gl_memory_obj, GL_DEDICATED_MEMORY_OBJECT_EXT, &dedicated); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[0]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + self->params.egl->glImportMemoryFdEXT(gl_memory_obj, target_surface_id->size[0], GL_HANDLE_TYPE_OPAQUE_FD_EXT, fd); + if(!self->params.egl->glIsMemoryObjectEXT(gl_memory_obj)) + fprintf(stderr, "failed to create object!\n"); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[1]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + fprintf(stderr, "gl memory obj: %u, error: %d\n", gl_memory_obj, self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + // fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError()); + // unsigned int vertex_buffer = 0; + // self->params.egl->glGenBuffers(1, &vertex_buffer); + // self->params.egl->glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + // self->params.egl->glBufferStorageMemEXT(GL_ARRAY_BUFFER, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]); + // fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); - self->sw_frame = av_frame_alloc(); - self->sw_frame->format = AV_PIX_FMT_NV12; - self->sw_frame->width = frame->width; - self->sw_frame->height = frame->height; + // fprintf(stderr, "0 gl error: %d\n", self->params.egl->glGetError()); + // unsigned int buffer = 0; + // self->params.egl->glCreateBuffers(1, &buffer); + // self->params.egl->glNamedBufferStorageMemEXT(buffer, target_surface_id->size[0], gl_memory_obj, target_surface_id->offset[0]); + // fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); - // TODO: Remove - if(av_frame_get_buffer(self->sw_frame, 0) < 0) { - fprintf(stderr, "failed to allocate sw frame\n"); - } + self->params.egl->glGenTextures(1, &self->target_textures[0]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[0]); + + fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling); + + fprintf(stderr, "tiling: %d\n", tiling); + + fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_R8, frame->width, frame->height, gl_memory_obj, target_surface_id->offset[0]); + + fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + self->params.egl->glGenTextures(1, &self->target_textures[1]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]); + + fprintf(stderr, "1 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, tiling); + + fprintf(stderr, "tiling: %d\n", tiling); + + fprintf(stderr, "2 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, GL_RG8, frame->width/2, frame->height/2, gl_memory_obj, target_surface_id->offset[0] + luma_size); + + fprintf(stderr, "3 gl error: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + } - // TODO: Remove - if(av_frame_make_writable(self->sw_frame) < 0) { - fprintf(stderr, "failed to make writable\n"); - } return true; } @@ -185,91 +269,6 @@ void gsr_video_encoder_vulkan_stop(gsr_video_encoder_vulkan *self, AVCodecContex av_buffer_unref(&self->device_ctx); } -static void nop_free(void *opaque, uint8_t *data) { - -} - -static void gsr_video_encoder_vulkan_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { - gsr_video_encoder_vulkan *self = encoder->priv; - - static int counter = 0; - ++counter; - - // AVBufferRef *av_buffer_create(uint8_t *data, size_t size, - // void (*free)(void *opaque, uint8_t *data), - // void *opaque, int flags); - - while(self->params.egl->glGetError()){} - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[0]); - //fprintf(stderr, "1 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[counter % 2]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - self->params.egl->glReadPixels(0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, 0); - //fprintf(stderr, "2 gl err: %d\n", self->params.egl->glGetError()); - - const int next_pbo_y = (counter + 1) % 2; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); - //fprintf(stderr, "3 gl err: %d\n", self->params.egl->glGetError()); - uint8_t *ptr_y = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - //fprintf(stderr, "4 gl err: %d\n", self->params.egl->glGetError()); - if(!ptr_y) { - fprintf(stderr, "failed to map buffer y!\n"); - } - - while(self->params.egl->glGetError()){} - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[1]); - //fprintf(stderr, "5 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[counter % 2]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); - //fprintf(stderr, "5.5 gl err: %d\n", self->params.egl->glGetError()); - self->params.egl->glReadPixels(0, 0, frame->width/2, frame->height/2, GL_RG, GL_UNSIGNED_BYTE, 0); - //fprintf(stderr, "6 gl err: %d\n", self->params.egl->glGetError()); - - const int next_pbo_uv = (counter + 1) % 2; - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[next_pbo_uv]); - self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); - //fprintf(stderr, "7 gl err: %d\n", self->params.egl->glGetError()); - uint8_t *ptr_uv = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - //fprintf(stderr, "8 gl err: %d\n", self->params.egl->glGetError()); - if(!ptr_uv) { - fprintf(stderr, "failed to map buffer uv!\n"); - } - - //self->sw_frame->buf[0] = av_buffer_create(ptr_y, 3840 * 2160, nop_free, NULL, 0); - //self->sw_frame->buf[1] = av_buffer_create(ptr_uv, 1920 * 1080 * 2, nop_free, NULL, 0); - //self->sw_frame->data[0] = self->sw_frame->buf[0]->data; - //self->sw_frame->data[1] = self->sw_frame->buf[1]->data; - //self->sw_frame->extended_data[0] = self->sw_frame->data[0]; - //self->sw_frame->extended_data[1] = self->sw_frame->data[1]; - - self->sw_frame->data[0] = ptr_y; - self->sw_frame->data[1] = ptr_uv; - - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - - //self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]); - //self->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, sw_frame->data[1]); - - //self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); - - int ret = av_hwframe_transfer_data(frame, self->sw_frame, 0); - if(ret < 0) { - fprintf(stderr, "transfer data failed, error: %s\n", av_err2str(ret)); - } - - //av_buffer_unref(&self->sw_frame->buf[0]); - //av_buffer_unref(&self->sw_frame->buf[1]); - - //av_frame_free(&sw_frame); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); - self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_uv]); - self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -} - static void gsr_video_encoder_vulkan_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { gsr_video_encoder_vulkan *self = encoder->priv; textures[0] = self->target_textures[0]; @@ -299,7 +298,7 @@ gsr_video_encoder* gsr_video_encoder_vulkan_create(const gsr_video_encoder_vulka *encoder = (gsr_video_encoder) { .start = gsr_video_encoder_vulkan_start, - .copy_textures_to_frame = gsr_video_encoder_vulkan_copy_textures_to_frame, + .copy_textures_to_frame = NULL, .get_textures = gsr_video_encoder_vulkan_get_textures, .destroy = gsr_video_encoder_vulkan_destroy, .priv = encoder_vulkan diff --git a/src/main.cpp b/src/main.cpp index b5f95db..75ed903 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -634,16 +634,16 @@ static AVCodecContext *create_video_codec_context(AVPixelFormat pix_fmt, if(codec_context->codec_id == AV_CODEC_ID_AV1 || codec_context->codec_id == AV_CODEC_ID_H264 || codec_context->codec_id == AV_CODEC_ID_HEVC) { switch(video_quality) { case VideoQuality::MEDIUM: - codec_context->global_quality = 150 * quality_multiply; + codec_context->global_quality = 130 * quality_multiply; break; case VideoQuality::HIGH: - codec_context->global_quality = 120 * quality_multiply; + codec_context->global_quality = 110 * quality_multiply; break; case VideoQuality::VERY_HIGH: - codec_context->global_quality = 115 * quality_multiply; + codec_context->global_quality = 95 * quality_multiply; break; case VideoQuality::ULTRA: - codec_context->global_quality = 90 * quality_multiply; + codec_context->global_quality = 85 * quality_multiply; break; } } else if(codec_context->codec_id == AV_CODEC_ID_VP8) { @@ -763,7 +763,7 @@ static AVFrame* create_audio_frame(AVCodecContext *audio_codec_context) { return frame; } -static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, AVDictionary **options) { +static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendor, gsr_color_depth color_depth, VideoCodec video_codec, AVDictionary **options) { #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(61, 17, 100) if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: Only for vaapi @@ -785,14 +785,15 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo av_dict_set(options, "profile", "main", 0); } #else + const bool use_nvidia_values = vendor == GSR_GPU_VENDOR_NVIDIA && !video_codec_is_vulkan(video_codec); if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: Only for vaapi //if(color_depth == GSR_COLOR_DEPTH_10_BITS) // av_dict_set_int(options, "profile", AV_PROFILE_H264_HIGH_10, 0); //else - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 2 : AV_PROFILE_H264_HIGH, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 2 : AV_PROFILE_H264_HIGH, 0); } else if(codec_context->codec_id == AV_CODEC_ID_AV1) { - if(vendor == GSR_GPU_VENDOR_NVIDIA) { + if(use_nvidia_values) { if(color_depth == GSR_COLOR_DEPTH_10_BITS) av_dict_set_int(options, "highbitdepth", 1, 0); } else { @@ -800,9 +801,9 @@ static void dict_set_profile(AVCodecContext *codec_context, gsr_gpu_vendor vendo } } else if(codec_context->codec_id == AV_CODEC_ID_HEVC) { if(color_depth == GSR_COLOR_DEPTH_10_BITS) - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 1 : AV_PROFILE_HEVC_MAIN_10, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 1 : AV_PROFILE_HEVC_MAIN_10, 0); else - av_dict_set_int(options, "profile", vendor == GSR_GPU_VENDOR_NVIDIA ? 0 : AV_PROFILE_HEVC_MAIN, 0); + av_dict_set_int(options, "profile", use_nvidia_values ? 0 : AV_PROFILE_HEVC_MAIN, 0); } #endif } @@ -867,7 +868,7 @@ static void open_video_software(AVCodecContext *codec_context, VideoQuality vide av_dict_set(&options, "preset", "veryfast", 0); av_dict_set(&options, "tune", "film", 0); - dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, &options); + dict_set_profile(codec_context, GSR_GPU_VENDOR_INTEL, color_depth, VideoCodec::H264, &options); if(codec_context->codec_id == AV_CODEC_ID_H264) { av_dict_set(&options, "coder", "cabac", 0); // TODO: cavlc is faster than cabac but worse compression. Which to use? @@ -1043,9 +1044,14 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide // TODO: Enable multipass - // TODO: Set "usage" option to "record"/"stream" and "content" option to "rendered" for vulkan encoding + dict_set_profile(codec_context, vendor, color_depth, video_codec, &options); - if(vendor == GSR_GPU_VENDOR_NVIDIA) { + if(video_codec_is_vulkan(video_codec)) { + av_dict_set_int(&options, "async_depth", 3, 0); + av_dict_set(&options, "tune", "hq", 0); + av_dict_set(&options, "usage", "record", 0); // TODO: Set to stream when streaming + av_dict_set(&options, "content", "rendered", 0); + } else if(vendor == GSR_GPU_VENDOR_NVIDIA) { // TODO: These dont seem to be necessary // av_dict_set_int(&options, "zerolatency", 1, 0); // if(codec_context->codec_id == AV_CODEC_ID_AV1) { @@ -1067,8 +1073,6 @@ static void open_video_hardware(AVCodecContext *codec_context, VideoQuality vide break; } - dict_set_profile(codec_context, vendor, color_depth, &options); - if(codec_context->codec_id == AV_CODEC_ID_H264) { // TODO: h264 10bit? // TODO: @@ -3465,9 +3469,9 @@ int main(int argc, char **argv) { video_codec = VideoCodec::VP8; } else if(strcmp(video_codec_to_use, "vp9") == 0) { video_codec = VideoCodec::VP9; - //} else if(strcmp(video_codec_to_use, "h264_vulkan") == 0) { + // } else if(strcmp(video_codec_to_use, "h264_vulkan") == 0) { // video_codec = VideoCodec::H264_VULKAN; - //} else if(strcmp(video_codec_to_use, "hevc_vulkan") == 0) { + // } else if(strcmp(video_codec_to_use, "hevc_vulkan") == 0) { // video_codec = VideoCodec::HEVC_VULKAN; } else if(strcmp(video_codec_to_use, "auto") != 0) { fprintf(stderr, "Error: -k should either be 'auto', 'h264', 'hevc', 'av1', 'vp8', 'vp9', 'hevc_hdr', 'av1_hdr', 'hevc_10bit' or 'av1_10bit', got: '%s'\n", video_codec_to_use); diff --git a/src/utils.c b/src/utils.c index 4aab138..9b4a4b6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -362,13 +362,9 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) { bool supported = true; const unsigned char *gl_vendor = egl->glGetString(GL_VENDOR); const unsigned char *gl_renderer = egl->glGetString(GL_RENDERER); - const unsigned char *gl_version = egl->glGetString(GL_VERSION); info->gpu_version = 0; info->is_steam_deck = false; - info->driver_major = 0; - info->driver_minor = 0; - info->driver_patch = 0; if(!gl_vendor) { fprintf(stderr, "gsr error: failed to get gpu vendor\n"); @@ -408,21 +404,6 @@ bool gl_get_gpu_info(gsr_egl *egl, gsr_gpu_info *info) { info->is_steam_deck = strstr((const char*)gl_renderer, "vangogh") != NULL; } - if(gl_version) { - const char *mesa_p = strstr((const char*)gl_version, "Mesa "); - if(mesa_p) { - mesa_p += 5; - int major = 0; - int minor = 0; - int patch = 0; - if(sscanf(mesa_p, "%d.%d.%d", &major, &minor, &patch) == 3) { - info->driver_major = major; - info->driver_minor = minor; - info->driver_patch = patch; - } - } - } - end: return supported; } |