diff options
Diffstat (limited to 'src/encoder')
-rw-r--r-- | src/encoder/video/cuda.c | 460 | ||||
-rw-r--r-- | src/encoder/video/nvenc.c | 237 | ||||
-rw-r--r-- | src/encoder/video/software.c | 45 | ||||
-rw-r--r-- | src/encoder/video/vaapi.c | 227 | ||||
-rw-r--r-- | src/encoder/video/video.c | 8 | ||||
-rw-r--r-- | src/encoder/video/vulkan.c | 313 |
6 files changed, 600 insertions, 690 deletions
diff --git a/src/encoder/video/cuda.c b/src/encoder/video/cuda.c deleted file mode 100644 index a323940..0000000 --- a/src/encoder/video/cuda.c +++ /dev/null @@ -1,460 +0,0 @@ -#include "../../../include/encoder/video/cuda.h" -#include "../../../include/egl.h" -#include "../../../include/cuda.h" -#include "../../../external/nvEncodeAPI.h" - -#include <libavcodec/avcodec.h> -#include <libavutil/hwcontext_cuda.h> - -#include <stdlib.h> -#include <dlfcn.h> - -typedef struct { - gsr_video_encoder_cuda_params params; - - unsigned int target_textures[2]; - - AVBufferRef *device_ctx; - - gsr_cuda cuda; - CUgraphicsResource cuda_graphics_resources[2]; - CUarray mapped_arrays[2]; - CUstream cuda_stream; -} gsr_video_encoder_cuda; - -static bool gsr_video_encoder_cuda_setup_context(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context) { - self->device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); - if(!self->device_ctx) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_context failed: failed to create hardware device context\n"); - return false; - } - - AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)self->device_ctx->data; - AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; - cuda_device_context->cuda_ctx = self->cuda.cu_ctx; - if(av_hwdevice_ctx_init(self->device_ctx) < 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_context failed: failed to create hardware device context\n"); - av_buffer_unref(&self->device_ctx); - return false; - } - - AVBufferRef *frame_context = av_hwframe_ctx_alloc(self->device_ctx); - if(!frame_context) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_context failed: failed to create hwframe context\n"); - av_buffer_unref(&self->device_ctx); - return false; - } - - AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; - hw_frame_context->width = video_codec_context->width; - hw_frame_context->height = video_codec_context->height; - hw_frame_context->sw_format = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? AV_PIX_FMT_P010LE : AV_PIX_FMT_NV12; - hw_frame_context->format = video_codec_context->pix_fmt; - hw_frame_context->device_ctx = (AVHWDeviceContext*)self->device_ctx->data; - - if (av_hwframe_ctx_init(frame_context) < 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_context failed: failed to initialize hardware frame context " - "(note: ffmpeg version needs to be > 4.0)\n"); - av_buffer_unref(&self->device_ctx); - //av_buffer_unref(&frame_context); - return false; - } - - self->cuda_stream = cuda_device_context->stream; - video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); - av_buffer_unref(&frame_context); - return true; -} - -static unsigned int gl_create_texture(gsr_egl *egl, int width, int height, int internal_format, unsigned int format) { - unsigned int texture_id = 0; - egl->glGenTextures(1, &texture_id); - egl->glBindTexture(GL_TEXTURE_2D, texture_id); - egl->glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL); - - egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - - egl->glBindTexture(GL_TEXTURE_2D, 0); - return texture_id; -} - -static bool cuda_register_opengl_texture(gsr_cuda *cuda, CUgraphicsResource *cuda_graphics_resource, CUarray *mapped_array, unsigned int texture_id) { - CUresult res; - res = cuda->cuGraphicsGLRegisterImage(cuda_graphics_resource, texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_NONE); - if (res != CUDA_SUCCESS) { - const char *err_str = "unknown"; - cuda->cuGetErrorString(res, &err_str); - fprintf(stderr, "gsr error: cuda_register_opengl_texture: cuGraphicsGLRegisterImage failed, error: %s, texture " "id: %u\n", err_str, texture_id); - return false; - } - - res = cuda->cuGraphicsResourceSetMapFlags(*cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); - res = cuda->cuGraphicsMapResources(1, cuda_graphics_resource, 0); - - res = cuda->cuGraphicsSubResourceGetMappedArray(mapped_array, *cuda_graphics_resource, 0, 0); - return true; -} - -static bool gsr_video_encoder_cuda_setup_textures(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context, AVFrame *frame) { - const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); - if(res < 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: av_hwframe_get_buffer failed: %d\n", res); - return false; - } - - const unsigned int internal_formats_nv12[2] = { GL_R8, GL_RG8 }; - const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; - const unsigned int formats[2] = { GL_RED, GL_RG }; - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size - - for(int i = 0; i < 2; ++i) { - self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i]); - if(self->target_textures[i] == 0) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: failed to create opengl texture\n"); - return false; - } - - if(!cuda_register_opengl_texture(&self->cuda, &self->cuda_graphics_resources[i], &self->mapped_arrays[i], self->target_textures[i])) { - return false; - } - } - - return true; -} - -static void* open_nvenc_library(void) { - dlerror(); /* clear */ - void *lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY); - if(!lib) { - lib = dlopen("libnvidia-encode.so", RTLD_LAZY); - if(!lib) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs failed: failed to load libnvidia-encode.so/libnvidia-encode.so.1, error: %s\n", dlerror()); - return NULL; - } - } - return lib; -} - -static bool profile_is_h264(const GUID *profile_guid) { - const GUID *h264_guids[] = { - &NV_ENC_H264_PROFILE_BASELINE_GUID, - &NV_ENC_H264_PROFILE_MAIN_GUID, - &NV_ENC_H264_PROFILE_HIGH_GUID, - &NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID, - &NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID - }; - - for(int i = 0; i < 5; ++i) { - if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) - return true; - } - - return false; -} - -static bool profile_is_hevc(const GUID *profile_guid) { - const GUID *h264_guids[] = { - &NV_ENC_HEVC_PROFILE_MAIN_GUID, - }; - - for(int i = 0; i < 1; ++i) { - if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) - return true; - } - - return false; -} - -static bool profile_is_hevc_10bit(const GUID *profile_guid) { - const GUID *h264_guids[] = { - &NV_ENC_HEVC_PROFILE_MAIN10_GUID, - }; - - for(int i = 0; i < 1; ++i) { - if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) - return true; - } - - return false; -} - -static bool profile_is_av1(const GUID *profile_guid) { - const GUID *h264_guids[] = { - &NV_ENC_AV1_PROFILE_MAIN_GUID, - }; - - for(int i = 0; i < 1; ++i) { - if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) - return true; - } - - return false; -} - -static bool encoder_get_supported_profiles(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, const GUID *encoder_guid, gsr_supported_video_codecs *supported_video_codecs) { - bool success = false; - GUID *profile_guids = NULL; - - uint32_t profile_guid_count = 0; - if(function_list->nvEncGetEncodeProfileGUIDCount(nvenc_encoder, *encoder_guid, &profile_guid_count) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); - goto fail; - } - - if(profile_guid_count == 0) - goto fail; - - profile_guids = calloc(profile_guid_count, sizeof(GUID)); - if(!profile_guids) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)profile_guid_count); - goto fail; - } - - if(function_list->nvEncGetEncodeProfileGUIDs(nvenc_encoder, *encoder_guid, profile_guids, profile_guid_count, &profile_guid_count) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); - goto fail; - } - - for(uint32_t i = 0; i < profile_guid_count; ++i) { - if(profile_is_h264(&profile_guids[i])) { - supported_video_codecs->h264 = true; - } else if(profile_is_hevc(&profile_guids[i])) { - supported_video_codecs->hevc = true; - } else if(profile_is_hevc_10bit(&profile_guids[i])) { - supported_video_codecs->hevc_hdr = true; - supported_video_codecs->hevc_10bit = true; - } else if(profile_is_av1(&profile_guids[i])) { - supported_video_codecs->av1 = true; - supported_video_codecs->av1_hdr = true; - supported_video_codecs->av1_10bit = true; - } - } - - success = true; - fail: - - if(profile_guids) - free(profile_guids); - - return success; -} - -static bool get_supported_video_codecs(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, gsr_supported_video_codecs *supported_video_codecs) { - bool success = false; - GUID *encoder_guids = NULL; - *supported_video_codecs = (gsr_supported_video_codecs){0}; - - uint32_t encode_guid_count = 0; - if(function_list->nvEncGetEncodeGUIDCount(nvenc_encoder, &encode_guid_count) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); - goto fail; - } - - if(encode_guid_count == 0) - goto fail; - - encoder_guids = calloc(encode_guid_count, sizeof(GUID)); - if(!encoder_guids) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)encode_guid_count); - goto fail; - } - - if(function_list->nvEncGetEncodeGUIDs(nvenc_encoder, encoder_guids, encode_guid_count, &encode_guid_count) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); - goto fail; - } - - for(uint32_t i = 0; i < encode_guid_count; ++i) { - encoder_get_supported_profiles(function_list, nvenc_encoder, &encoder_guids[i], supported_video_codecs); - } - - success = true; - fail: - - if(encoder_guids) - free(encoder_guids); - - return success; -} - -#define NVENCAPI_VERSION_470 (11 | (1 << 24)) - -static gsr_supported_video_codecs gsr_video_encoder_cuda_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { - (void)encoder; - - void *nvenc_lib = NULL; - gsr_cuda cuda; - memset(&cuda, 0, sizeof(cuda)); - gsr_supported_video_codecs supported_video_codecs = {0}; - - if(!gsr_cuda_load(&cuda, NULL, false)) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to load cuda\n"); - goto done; - } - - nvenc_lib = open_nvenc_library(); - if(!nvenc_lib) - goto done; - - typedef NVENCSTATUS NVENCAPI (*FUNC_NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList); - FUNC_NvEncodeAPICreateInstance nvEncodeAPICreateInstance = (FUNC_NvEncodeAPICreateInstance)dlsym(nvenc_lib, "NvEncodeAPICreateInstance"); - if(!nvEncodeAPICreateInstance) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to find NvEncodeAPICreateInstance in libnvidia-encode.so\n"); - goto done; - } - - NV_ENCODE_API_FUNCTION_LIST function_list; - memset(&function_list, 0, sizeof(function_list)); - function_list.version = NV_ENCODE_API_FUNCTION_LIST_VER; - if(nvEncodeAPICreateInstance(&function_list) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncodeAPICreateInstance failed\n"); - goto done; - } - - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params; - memset(¶ms, 0, sizeof(params)); - params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; - params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; - params.device = cuda.cu_ctx; - params.apiVersion = NVENCAPI_VERSION; - - void *nvenc_encoder = NULL; - if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { - // Old nvidia gpus dont support the new nvenc api (which is required for av1). - // In such cases fallback to old api version if possible and try again. - params.apiVersion = NVENCAPI_VERSION_470; - if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncOpenEncodeSessionEx failed\n"); - goto done; - } - } - - get_supported_video_codecs(&function_list, nvenc_encoder, &supported_video_codecs); - - done: - if(cleanup) { - function_list.nvEncDestroyEncoder(nvenc_encoder); - if(nvenc_lib) - dlclose(nvenc_lib); - gsr_cuda_unload(&cuda); - } - - return supported_video_codecs; -} - -static void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context); - -static bool gsr_video_encoder_cuda_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { - gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - - const bool overclock = gsr_egl_get_display_server(encoder_cuda->params.egl) == GSR_DISPLAY_SERVER_X11 ? encoder_cuda->params.overclock : false; - if(!gsr_cuda_load(&encoder_cuda->cuda, encoder_cuda->params.egl->x11.dpy, overclock)) { - fprintf(stderr, "gsr error: gsr_video_encoder_cuda_start: failed to load cuda\n"); - gsr_video_encoder_cuda_stop(encoder_cuda, video_codec_context); - return false; - } - - if(!gsr_video_encoder_cuda_setup_context(encoder_cuda, video_codec_context)) { - gsr_video_encoder_cuda_stop(encoder_cuda, video_codec_context); - return false; - } - - if(!gsr_video_encoder_cuda_setup_textures(encoder_cuda, video_codec_context, frame)) { - gsr_video_encoder_cuda_stop(encoder_cuda, video_codec_context); - return false; - } - - return true; -} - -void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context) { - self->params.egl->glDeleteTextures(2, self->target_textures); - self->target_textures[0] = 0; - self->target_textures[1] = 0; - - if(video_codec_context->hw_frames_ctx) - av_buffer_unref(&video_codec_context->hw_frames_ctx); - if(self->device_ctx) - av_buffer_unref(&self->device_ctx); - - if(self->cuda.cu_ctx) { - for(int i = 0; i < 2; ++i) { - if(self->cuda_graphics_resources[i]) { - self->cuda.cuGraphicsUnmapResources(1, &self->cuda_graphics_resources[i], 0); - self->cuda.cuGraphicsUnregisterResource(self->cuda_graphics_resources[i]); - self->cuda_graphics_resources[i] = 0; - } - } - } - - gsr_cuda_unload(&self->cuda); -} - -static void gsr_video_encoder_cuda_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame) { - gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size - for(int i = 0; i < 2; ++i) { - CUDA_MEMCPY2D memcpy_struct; - memcpy_struct.srcXInBytes = 0; - memcpy_struct.srcY = 0; - memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; - - memcpy_struct.dstXInBytes = 0; - memcpy_struct.dstY = 0; - memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; - - memcpy_struct.srcArray = encoder_cuda->mapped_arrays[i]; - memcpy_struct.srcPitch = frame->width / div[i]; - memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; - memcpy_struct.dstPitch = frame->linesize[i]; - memcpy_struct.WidthInBytes = frame->width * (encoder_cuda->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? 2 : 1); - memcpy_struct.Height = frame->height / div[i]; - // TODO: Remove this copy if possible - encoder_cuda->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, encoder_cuda->cuda_stream); - } - - // TODO: needed? - encoder_cuda->cuda.cuStreamSynchronize(encoder_cuda->cuda_stream); -} - -static void gsr_video_encoder_cuda_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { - gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - textures[0] = encoder_cuda->target_textures[0]; - textures[1] = encoder_cuda->target_textures[1]; - *num_textures = 2; - *destination_color = encoder_cuda->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; -} - -static void gsr_video_encoder_cuda_destroy(gsr_video_encoder *encoder, AVCodecContext *video_codec_context) { - gsr_video_encoder_cuda_stop(encoder->priv, video_codec_context); - free(encoder->priv); - free(encoder); -} - -gsr_video_encoder* gsr_video_encoder_cuda_create(const gsr_video_encoder_cuda_params *params) { - gsr_video_encoder *encoder = calloc(1, sizeof(gsr_video_encoder)); - if(!encoder) - return NULL; - - gsr_video_encoder_cuda *encoder_cuda = calloc(1, sizeof(gsr_video_encoder_cuda)); - if(!encoder_cuda) { - free(encoder); - return NULL; - } - - encoder_cuda->params = *params; - - *encoder = (gsr_video_encoder) { - .get_supported_codecs = gsr_video_encoder_cuda_get_supported_codecs, - .start = gsr_video_encoder_cuda_start, - .copy_textures_to_frame = gsr_video_encoder_cuda_copy_textures_to_frame, - .get_textures = gsr_video_encoder_cuda_get_textures, - .destroy = gsr_video_encoder_cuda_destroy, - .priv = encoder_cuda - }; - - return encoder; -} diff --git a/src/encoder/video/nvenc.c b/src/encoder/video/nvenc.c new file mode 100644 index 0000000..718560d --- /dev/null +++ b/src/encoder/video/nvenc.c @@ -0,0 +1,237 @@ +#include "../../../include/encoder/video/nvenc.h" +#include "../../../include/egl.h" +#include "../../../include/cuda.h" + +#include <libavcodec/avcodec.h> +#include <libavutil/hwcontext_cuda.h> + +#include <stdlib.h> + +typedef struct { + gsr_video_encoder_nvenc_params params; + + unsigned int target_textures[2]; + + AVBufferRef *device_ctx; + + gsr_cuda cuda; + CUgraphicsResource cuda_graphics_resources[2]; + CUarray mapped_arrays[2]; + CUstream cuda_stream; +} gsr_video_encoder_nvenc; + +static bool gsr_video_encoder_nvenc_setup_context(gsr_video_encoder_nvenc *self, AVCodecContext *video_codec_context) { + self->device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if(!self->device_ctx) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_context failed: failed to create hardware device context\n"); + return false; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)self->device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + cuda_device_context->cuda_ctx = self->cuda.cu_ctx; + if(av_hwdevice_ctx_init(self->device_ctx) < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_context failed: failed to create hardware device context\n"); + av_buffer_unref(&self->device_ctx); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(self->device_ctx); + if(!frame_context) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_context failed: failed to create hwframe context\n"); + av_buffer_unref(&self->device_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? AV_PIX_FMT_P010LE : AV_PIX_FMT_NV12; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ctx = (AVHWDeviceContext*)self->device_ctx->data; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_context failed: failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&self->device_ctx); + //av_buffer_unref(&frame_context); + return false; + } + + self->cuda_stream = cuda_device_context->stream; + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); + av_buffer_unref(&frame_context); + return true; +} + +static unsigned int gl_create_texture(gsr_egl *egl, int width, int height, int internal_format, unsigned int format) { + unsigned int texture_id = 0; + egl->glGenTextures(1, &texture_id); + egl->glBindTexture(GL_TEXTURE_2D, texture_id); + egl->glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL); + + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + egl->glBindTexture(GL_TEXTURE_2D, 0); + return texture_id; +} + +static bool cuda_register_opengl_texture(gsr_cuda *cuda, CUgraphicsResource *cuda_graphics_resource, CUarray *mapped_array, unsigned int texture_id) { + CUresult res; + res = cuda->cuGraphicsGLRegisterImage(cuda_graphics_resource, texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_NONE); + if (res != CUDA_SUCCESS) { + const char *err_str = "unknown"; + cuda->cuGetErrorString(res, &err_str); + fprintf(stderr, "gsr error: cuda_register_opengl_texture: cuGraphicsGLRegisterImage failed, error: %s, texture " "id: %u\n", err_str, texture_id); + return false; + } + + res = cuda->cuGraphicsResourceSetMapFlags(*cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + res = cuda->cuGraphicsMapResources(1, cuda_graphics_resource, 0); + + res = cuda->cuGraphicsSubResourceGetMappedArray(mapped_array, *cuda_graphics_resource, 0, 0); + return true; +} + +static bool gsr_video_encoder_nvenc_setup_textures(gsr_video_encoder_nvenc *self, AVCodecContext *video_codec_context, AVFrame *frame) { + const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); + if(res < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_textures: av_hwframe_get_buffer failed: %d\n", res); + return false; + } + + const unsigned int internal_formats_nv12[2] = { GL_R8, GL_RG8 }; + const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; + const unsigned int formats[2] = { GL_RED, GL_RG }; + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + + for(int i = 0; i < 2; ++i) { + self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i]); + if(self->target_textures[i] == 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_setup_textures: failed to create opengl texture\n"); + return false; + } + + if(!cuda_register_opengl_texture(&self->cuda, &self->cuda_graphics_resources[i], &self->mapped_arrays[i], self->target_textures[i])) { + return false; + } + } + + return true; +} + +static void gsr_video_encoder_nvenc_stop(gsr_video_encoder_nvenc *self, AVCodecContext *video_codec_context); + +static bool gsr_video_encoder_nvenc_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { + gsr_video_encoder_nvenc *self = encoder->priv; + + const bool overclock = gsr_egl_get_display_server(self->params.egl) == GSR_DISPLAY_SERVER_X11 ? self->params.overclock : false; + if(!gsr_cuda_load(&self->cuda, self->params.egl->x11.dpy, overclock)) { + fprintf(stderr, "gsr error: gsr_video_encoder_nvenc_start: failed to load cuda\n"); + gsr_video_encoder_nvenc_stop(self, video_codec_context); + return false; + } + + if(!gsr_video_encoder_nvenc_setup_context(self, video_codec_context)) { + gsr_video_encoder_nvenc_stop(self, video_codec_context); + return false; + } + + if(!gsr_video_encoder_nvenc_setup_textures(self, video_codec_context, frame)) { + gsr_video_encoder_nvenc_stop(self, video_codec_context); + return false; + } + + return true; +} + +void gsr_video_encoder_nvenc_stop(gsr_video_encoder_nvenc *self, AVCodecContext *video_codec_context) { + self->params.egl->glDeleteTextures(2, self->target_textures); + self->target_textures[0] = 0; + self->target_textures[1] = 0; + + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(self->device_ctx) + av_buffer_unref(&self->device_ctx); + + if(self->cuda.cu_ctx) { + for(int i = 0; i < 2; ++i) { + if(self->cuda_graphics_resources[i]) { + self->cuda.cuGraphicsUnmapResources(1, &self->cuda_graphics_resources[i], 0); + self->cuda.cuGraphicsUnregisterResource(self->cuda_graphics_resources[i]); + self->cuda_graphics_resources[i] = 0; + } + } + } + + gsr_cuda_unload(&self->cuda); +} + +static void gsr_video_encoder_nvenc_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { + gsr_video_encoder_nvenc *self = encoder->priv; + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + for(int i = 0; i < 2; ++i) { + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = self->mapped_arrays[i]; + memcpy_struct.srcPitch = frame->width / div[i]; + memcpy_struct.dstDevice = (CUdeviceptr)frame->data[i]; + memcpy_struct.dstPitch = frame->linesize[i]; + memcpy_struct.WidthInBytes = frame->width * (self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? 2 : 1); + memcpy_struct.Height = frame->height / div[i]; + // TODO: Remove this copy if possible + self->cuda.cuMemcpy2DAsync_v2(&memcpy_struct, self->cuda_stream); + } + + // TODO: needed? + self->cuda.cuStreamSynchronize(self->cuda_stream); +} + +static void gsr_video_encoder_nvenc_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { + gsr_video_encoder_nvenc *self = encoder->priv; + textures[0] = self->target_textures[0]; + textures[1] = self->target_textures[1]; + *num_textures = 2; + *destination_color = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; +} + +static void gsr_video_encoder_nvenc_destroy(gsr_video_encoder *encoder, AVCodecContext *video_codec_context) { + gsr_video_encoder_nvenc_stop(encoder->priv, video_codec_context); + free(encoder->priv); + free(encoder); +} + +gsr_video_encoder* gsr_video_encoder_nvenc_create(const gsr_video_encoder_nvenc_params *params) { + gsr_video_encoder *encoder = calloc(1, sizeof(gsr_video_encoder)); + if(!encoder) + return NULL; + + gsr_video_encoder_nvenc *encoder_cuda = calloc(1, sizeof(gsr_video_encoder_nvenc)); + if(!encoder_cuda) { + free(encoder); + return NULL; + } + + encoder_cuda->params = *params; + + *encoder = (gsr_video_encoder) { + .start = gsr_video_encoder_nvenc_start, + .copy_textures_to_frame = gsr_video_encoder_nvenc_copy_textures_to_frame, + .get_textures = gsr_video_encoder_nvenc_get_textures, + .destroy = gsr_video_encoder_nvenc_destroy, + .priv = encoder_cuda + }; + + return encoder; +} diff --git a/src/encoder/video/software.c b/src/encoder/video/software.c index 4a4b78a..be227f2 100644 --- a/src/encoder/video/software.c +++ b/src/encoder/video/software.c @@ -58,26 +58,10 @@ static bool gsr_video_encoder_software_setup_textures(gsr_video_encoder_software return true; } -static gsr_supported_video_codecs gsr_video_encoder_software_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { - (void)encoder; - (void)cleanup; - return (gsr_supported_video_codecs) { - .h264 = true, - .hevc = false, - .hevc_hdr = false, - .hevc_10bit = false, - .av1 = false, - .av1_hdr = false, - .av1_10bit = false, - .vp8 = false, - .vp9 = false - }; -} - static void gsr_video_encoder_software_stop(gsr_video_encoder_software *self, AVCodecContext *video_codec_context); static bool gsr_video_encoder_software_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { - gsr_video_encoder_software *encoder_software = encoder->priv; + gsr_video_encoder_software *self = encoder->priv; video_codec_context->width = FFALIGN(video_codec_context->width, LINESIZE_ALIGNMENT); video_codec_context->height = FFALIGN(video_codec_context->height, 2); @@ -85,8 +69,8 @@ static bool gsr_video_encoder_software_start(gsr_video_encoder *encoder, AVCodec frame->width = video_codec_context->width; frame->height = video_codec_context->height; - if(!gsr_video_encoder_software_setup_textures(encoder_software, video_codec_context, frame)) { - gsr_video_encoder_software_stop(encoder_software, video_codec_context); + if(!gsr_video_encoder_software_setup_textures(self, video_codec_context, frame)) { + gsr_video_encoder_software_stop(self, video_codec_context); return false; } @@ -100,29 +84,29 @@ void gsr_video_encoder_software_stop(gsr_video_encoder_software *self, AVCodecCo self->target_textures[1] = 0; } -static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame) { - gsr_video_encoder_software *encoder_software = encoder->priv; +static void gsr_video_encoder_software_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { + gsr_video_encoder_software *self = encoder->priv; // TODO: hdr support const unsigned int formats[2] = { GL_RED, GL_RG }; for(int i = 0; i < 2; ++i) { - encoder_software->params.egl->glBindTexture(GL_TEXTURE_2D, encoder_software->target_textures[i]); + self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[i]); // We could use glGetTexSubImage and then we wouldn't have to use a specific linesize (LINESIZE_ALIGNMENT) that adds padding, // but glGetTexSubImage is only available starting from opengl 4.5. - encoder_software->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); + self->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, formats[i], GL_UNSIGNED_BYTE, frame->data[i]); } - encoder_software->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); // cap_kms->kms.base.egl->eglSwapBuffers(cap_kms->kms.base.egl->egl_display, cap_kms->kms.base.egl->egl_surface); - encoder_software->params.egl->glFlush(); - encoder_software->params.egl->glFinish(); + self->params.egl->glFlush(); + self->params.egl->glFinish(); } static void gsr_video_encoder_software_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { - gsr_video_encoder_software *encoder_software = encoder->priv; - textures[0] = encoder_software->target_textures[0]; - textures[1] = encoder_software->target_textures[1]; + gsr_video_encoder_software *self = encoder->priv; + textures[0] = self->target_textures[0]; + textures[1] = self->target_textures[1]; *num_textures = 2; - *destination_color = encoder_software->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; + *destination_color = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; } static void gsr_video_encoder_software_destroy(gsr_video_encoder *encoder, AVCodecContext *video_codec_context) { @@ -145,7 +129,6 @@ gsr_video_encoder* gsr_video_encoder_software_create(const gsr_video_encoder_sof encoder_software->params = *params; *encoder = (gsr_video_encoder) { - .get_supported_codecs = gsr_video_encoder_software_get_supported_codecs, .start = gsr_video_encoder_software_start, .copy_textures_to_frame = gsr_video_encoder_software_copy_textures_to_frame, .get_textures = gsr_video_encoder_software_get_textures, diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c index 03218cb..d558785 100644 --- a/src/encoder/video/vaapi.c +++ b/src/encoder/video/vaapi.c @@ -4,9 +4,9 @@ #include <libavcodec/avcodec.h> #include <libavutil/hwcontext_vaapi.h> +#include <libavutil/intreadwrite.h> #include <va/va_drmcommon.h> -#include <va/va_drm.h> #include <stdlib.h> #include <unistd.h> @@ -102,6 +102,7 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self uint32_t pitches[4]; uint64_t modifiers[4]; for(uint32_t j = 0; j < self->prime.layers[layer].num_planes; ++j) { + // TODO: Close these? in _stop, using self->prime fds[j] = self->prime.objects[self->prime.layers[layer].object_index[j]].fd; offsets[j] = self->prime.layers[layer].offset[j]; pitches[j] = self->prime.layers[layer].pitch[j]; @@ -147,194 +148,16 @@ static bool gsr_video_encoder_vaapi_setup_textures(gsr_video_encoder_vaapi *self } } -static bool profile_is_h264(VAProfile profile) { - switch(profile) { - case 5: // VAProfileH264Baseline - case VAProfileH264Main: - case VAProfileH264High: - case VAProfileH264ConstrainedBaseline: - return true; - default: - return false; - } -} - -static bool profile_is_hevc_8bit(VAProfile profile) { - switch(profile) { - case VAProfileHEVCMain: - return true; - default: - return false; - } -} - -static bool profile_is_hevc_10bit(VAProfile profile) { - switch(profile) { - case VAProfileHEVCMain10: - //case VAProfileHEVCMain12: - //case VAProfileHEVCMain422_10: - //case VAProfileHEVCMain422_12: - //case VAProfileHEVCMain444: - //case VAProfileHEVCMain444_10: - //case VAProfileHEVCMain444_12: - return true; - default: - return false; - } -} - -static bool profile_is_av1(VAProfile profile) { - switch(profile) { - case VAProfileAV1Profile0: - case VAProfileAV1Profile1: - return true; - default: - return false; - } -} - -static bool profile_is_vp8(VAProfile profile) { - switch(profile) { - case VAProfileVP8Version0_3: - return true; - default: - return false; - } -} - -static bool profile_is_vp9(VAProfile profile) { - switch(profile) { - case VAProfileVP9Profile0: - case VAProfileVP9Profile1: - case VAProfileVP9Profile2: - case VAProfileVP9Profile3: - return true; - default: - return false; - } -} - -static bool profile_supports_video_encoding(VADisplay va_dpy, VAProfile profile) { - int num_entrypoints = vaMaxNumEntrypoints(va_dpy); - if(num_entrypoints <= 0) - return false; - - VAEntrypoint *entrypoint_list = calloc(num_entrypoints, sizeof(VAEntrypoint)); - if(!entrypoint_list) - return false; - - bool supported = false; - if(vaQueryConfigEntrypoints(va_dpy, profile, entrypoint_list, &num_entrypoints) == VA_STATUS_SUCCESS) { - for(int i = 0; i < num_entrypoints; ++i) { - if(entrypoint_list[i] == VAEntrypointEncSlice) { - supported = true; - break; - } - } - } - - free(entrypoint_list); - return supported; -} - -static bool get_supported_video_codecs(VADisplay va_dpy, gsr_supported_video_codecs *video_codecs, bool cleanup) { - *video_codecs = (gsr_supported_video_codecs){0}; - bool success = false; - VAProfile *profile_list = NULL; - - vaSetInfoCallback(va_dpy, NULL, NULL); - - int va_major = 0; - int va_minor = 0; - if(vaInitialize(va_dpy, &va_major, &va_minor) != VA_STATUS_SUCCESS) { - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: vaInitialize failed\n"); - goto fail; - } - - int num_profiles = vaMaxNumProfiles(va_dpy); - if(num_profiles <= 0) - goto fail; - - profile_list = calloc(num_profiles, sizeof(VAProfile)); - if(!profile_list || vaQueryConfigProfiles(va_dpy, profile_list, &num_profiles) != VA_STATUS_SUCCESS) - goto fail; - - for(int i = 0; i < num_profiles; ++i) { - if(profile_is_h264(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) - video_codecs->h264 = true; - } else if(profile_is_hevc_8bit(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) - video_codecs->hevc = true; - } else if(profile_is_hevc_10bit(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) { - video_codecs->hevc_hdr = true; - video_codecs->hevc_10bit = true; - } - } else if(profile_is_av1(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) { - video_codecs->av1 = true; - video_codecs->av1_hdr = true; - video_codecs->av1_10bit = true; - } - } else if(profile_is_vp8(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) - video_codecs->vp8 = true; - } else if(profile_is_vp9(profile_list[i])) { - if(profile_supports_video_encoding(va_dpy, profile_list[i])) - video_codecs->vp9 = true; - } - } - - success = true; - fail: - if(profile_list) - free(profile_list); - - if(cleanup) - vaTerminate(va_dpy); - - return success; -} - -static gsr_supported_video_codecs gsr_video_encoder_vaapi_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { - gsr_video_encoder_vaapi *encoder_vaapi = encoder->priv; - gsr_supported_video_codecs supported_video_codecs = {0}; - - char render_path[128]; - if(!gsr_card_path_get_render_path(encoder_vaapi->params.egl->card_path, render_path)) { - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to get /dev/dri/renderDXXX file from %s\n", encoder_vaapi->params.egl->card_path); - return supported_video_codecs; - } - - const int drm_fd = open(render_path, O_RDWR); - if(drm_fd == -1) { - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to open device %s\n", render_path); - return supported_video_codecs; - } - - VADisplay va_dpy = vaGetDisplayDRM(drm_fd); - if(va_dpy) { - if(!get_supported_video_codecs(va_dpy, &supported_video_codecs, cleanup)) - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to query supported video codecs for device %s\n", render_path); - } - - if(cleanup) - close(drm_fd); - - return supported_video_codecs; -} - static void gsr_video_encoder_vaapi_stop(gsr_video_encoder_vaapi *self, AVCodecContext *video_codec_context); static bool gsr_video_encoder_vaapi_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { - gsr_video_encoder_vaapi *encoder_vaapi = encoder->priv; + gsr_video_encoder_vaapi *self = encoder->priv; - if(encoder_vaapi->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && video_codec_context->codec_id == AV_CODEC_ID_HEVC) { + if(self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && video_codec_context->codec_id == AV_CODEC_ID_HEVC) { // TODO: dont do this if using ffmpeg reports that this is not needed (AMD driver bug that was fixed recently) video_codec_context->width = FFALIGN(video_codec_context->width, 64); video_codec_context->height = FFALIGN(video_codec_context->height, 16); - } else if(encoder_vaapi->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && video_codec_context->codec_id == AV_CODEC_ID_AV1) { + } else if(self->params.egl->gpu_info.vendor == GSR_GPU_VENDOR_AMD && video_codec_context->codec_id == AV_CODEC_ID_AV1) { // TODO: Dont do this for VCN 5 and forward which should fix this hardware bug video_codec_context->width = FFALIGN(video_codec_context->width, 64); // AMD driver has special case handling for 1080 height to set it to 1082 instead of 1088 (1080 aligned to 16). @@ -346,20 +169,40 @@ static bool gsr_video_encoder_vaapi_start(gsr_video_encoder *encoder, AVCodecCon } } - if(video_codec_context->width != frame->width || video_codec_context->height != frame->height) { + const int crop_top = (video_codec_context->height - frame->height) / 2; + const int crop_left = (video_codec_context->width - frame->width) / 2; + if(crop_top != 0 || crop_left != 0) { fprintf(stderr, "gsr warning: gsr_video_encoder_vaapi_start: black bars have been added to the video because of a bug in AMD drivers/hardware. Record with h264 codec instead (-k h264) to get around this issue\n"); +#if 0 + #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 10, 100) + const int crop_bottom = crop_top; + const int crop_right = crop_left; + fprintf(stderr, "gsr info: cropping metadata has been added to the file to try and workaround this issue. Video players that support this will remove the black bars when the video is playing\n"); + const int frame_cropping_data_size = 4 * sizeof(uint32_t); + uint8_t *frame_cropping = av_malloc(frame_cropping_data_size); + if(frame_cropping) { + AV_WL32(frame_cropping + 0, crop_top); + AV_WL32(frame_cropping + 4, crop_bottom); + AV_WL32(frame_cropping + 8, crop_left); + AV_WL32(frame_cropping + 12, crop_right); + const bool sidedata_added = av_packet_side_data_add(&video_stream->codecpar->coded_side_data, &video_stream->codecpar->nb_coded_side_data, AV_PKT_DATA_FRAME_CROPPING, frame_cropping, frame_cropping_data_size, 0) != NULL; + if(!sidedata_added) + av_free(frame_cropping); + } + #endif +#endif } frame->width = video_codec_context->width; frame->height = video_codec_context->height; - if(!gsr_video_encoder_vaapi_setup_context(encoder_vaapi, video_codec_context)) { - gsr_video_encoder_vaapi_stop(encoder_vaapi, video_codec_context); + if(!gsr_video_encoder_vaapi_setup_context(self, video_codec_context)) { + gsr_video_encoder_vaapi_stop(self, video_codec_context); return false; } - if(!gsr_video_encoder_vaapi_setup_textures(encoder_vaapi, video_codec_context, frame)) { - gsr_video_encoder_vaapi_stop(encoder_vaapi, video_codec_context); + if(!gsr_video_encoder_vaapi_setup_textures(self, video_codec_context, frame)) { + gsr_video_encoder_vaapi_stop(self, video_codec_context); return false; } @@ -385,11 +228,11 @@ void gsr_video_encoder_vaapi_stop(gsr_video_encoder_vaapi *self, AVCodecContext } static void gsr_video_encoder_vaapi_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { - gsr_video_encoder_vaapi *encoder_vaapi = encoder->priv; - textures[0] = encoder_vaapi->target_textures[0]; - textures[1] = encoder_vaapi->target_textures[1]; + gsr_video_encoder_vaapi *self = encoder->priv; + textures[0] = self->target_textures[0]; + textures[1] = self->target_textures[1]; *num_textures = 2; - *destination_color = encoder_vaapi->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; + *destination_color = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; } static void gsr_video_encoder_vaapi_destroy(gsr_video_encoder *encoder, AVCodecContext *video_codec_context) { @@ -412,9 +255,7 @@ gsr_video_encoder* gsr_video_encoder_vaapi_create(const gsr_video_encoder_vaapi_ encoder_vaapi->params = *params; *encoder = (gsr_video_encoder) { - .get_supported_codecs = gsr_video_encoder_vaapi_get_supported_codecs, .start = gsr_video_encoder_vaapi_start, - .copy_textures_to_frame = NULL, .get_textures = gsr_video_encoder_vaapi_get_textures, .destroy = gsr_video_encoder_vaapi_destroy, .priv = encoder_vaapi diff --git a/src/encoder/video/video.c b/src/encoder/video/video.c index daaf537..76d53b0 100644 --- a/src/encoder/video/video.c +++ b/src/encoder/video/video.c @@ -1,10 +1,6 @@ #include "../../../include/encoder/video/video.h" #include <assert.h> -gsr_supported_video_codecs gsr_video_encoder_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { - return encoder->get_supported_codecs(encoder, cleanup); -} - bool gsr_video_encoder_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { assert(!encoder->started); bool res = encoder->start(encoder, video_codec_context, frame); @@ -13,10 +9,10 @@ bool gsr_video_encoder_start(gsr_video_encoder *encoder, AVCodecContext *video_c return res; } -void gsr_video_encoder_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame) { +void gsr_video_encoder_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { assert(encoder->started); if(encoder->copy_textures_to_frame) - encoder->copy_textures_to_frame(encoder, frame); + encoder->copy_textures_to_frame(encoder, frame, color_conversion); } void gsr_video_encoder_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { diff --git a/src/encoder/video/vulkan.c b/src/encoder/video/vulkan.c new file mode 100644 index 0000000..0b6c380 --- /dev/null +++ b/src/encoder/video/vulkan.c @@ -0,0 +1,313 @@ +#include "../../../include/encoder/video/vulkan.h" +#include "../../../include/utils.h" +#include "../../../include/egl.h" + +#include <libavcodec/avcodec.h> +#define VK_NO_PROTOTYPES +#include <libavutil/hwcontext_vulkan.h> + +//#include <vulkan/vulkan_core.h> + +#define GL_TEXTURE_TILING_EXT 0x9580 +#define GL_OPTIMAL_TILING_EXT 0x9584 +#define GL_LINEAR_TILING_EXT 0x9585 + +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_DRAW 0x88E0 +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_FRAMEBUFFER 0x8CA8 + +typedef struct { + gsr_video_encoder_vulkan_params params; + unsigned int target_textures[2]; + AVBufferRef *device_ctx; + AVVulkanDeviceContext* vv; + unsigned int pbo_y[2]; + unsigned int pbo_uv[2]; + AVFrame *sw_frame; +} gsr_video_encoder_vulkan; + +static bool gsr_video_encoder_vulkan_setup_context(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) { + AVDictionary *options = NULL; + //av_dict_set(&options, "linear_images", "1", 0); + //av_dict_set(&options, "disable_multiplane", "1", 0); + + // TODO: Use correct device + if(av_hwdevice_ctx_create(&self->device_ctx, AV_HWDEVICE_TYPE_VULKAN, NULL, options, 0) < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_vulkan_setup_context: failed to create hardware device context\n"); + return false; + } + + AVBufferRef *frame_context = av_hwframe_ctx_alloc(self->device_ctx); + if(!frame_context) { + fprintf(stderr, "gsr error: gsr_video_encoder_vulkan_setup_context: failed to create hwframe context\n"); + av_buffer_unref(&self->device_ctx); + return false; + } + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; + hw_frame_context->width = video_codec_context->width; + hw_frame_context->height = video_codec_context->height; + hw_frame_context->sw_format = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? AV_PIX_FMT_P010LE : AV_PIX_FMT_NV12; + hw_frame_context->format = video_codec_context->pix_fmt; + hw_frame_context->device_ctx = (AVHWDeviceContext*)self->device_ctx->data; + + //AVVulkanFramesContext *vk_frame_ctx = (AVVulkanFramesContext*)hw_frame_context->hwctx; + //hw_frame_context->initial_pool_size = 20; + + if (av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_vulkan_setup_context: failed to initialize hardware frame context " + "(note: ffmpeg version needs to be > 4.0)\n"); + av_buffer_unref(&self->device_ctx); + //av_buffer_unref(&frame_context); + return false; + } + + video_codec_context->hw_frames_ctx = av_buffer_ref(frame_context); + av_buffer_unref(&frame_context); + return true; +} + +static unsigned int gl_create_texture(gsr_egl *egl, int width, int height, int internal_format, unsigned int format) { + unsigned int texture_id = 0; + egl->glGenTextures(1, &texture_id); + egl->glBindTexture(GL_TEXTURE_2D, texture_id); + //egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, GL_OPTIMAL_TILING_EXT); + egl->glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL); + + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + egl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + + egl->glBindTexture(GL_TEXTURE_2D, 0); + return texture_id; +} + +static AVVulkanDeviceContext* video_codec_context_get_vulkan_data(AVCodecContext *video_codec_context) { + AVBufferRef *hw_frames_ctx = video_codec_context->hw_frames_ctx; + if(!hw_frames_ctx) + return NULL; + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)hw_frames_ctx->data; + AVHWDeviceContext *device_context = (AVHWDeviceContext*)hw_frame_context->device_ctx; + if(device_context->type != AV_HWDEVICE_TYPE_VULKAN) + return NULL; + + return (AVVulkanDeviceContext*)device_context->hwctx; +} + +static bool gsr_video_encoder_vulkan_setup_textures(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context, AVFrame *frame) { + const int res = av_hwframe_get_buffer(video_codec_context->hw_frames_ctx, frame, 0); + if(res < 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_vulkan_setup_textures: av_hwframe_get_buffer failed: %d\n", res); + return false; + } + + //AVVkFrame *target_surface_id = (AVVkFrame*)frame->data[0]; + self->vv = video_codec_context_get_vulkan_data(video_codec_context); + + const unsigned int internal_formats_nv12[2] = { GL_RGBA8, GL_RGBA8 }; + const unsigned int internal_formats_p010[2] = { GL_R16, GL_RG16 }; + const unsigned int formats[2] = { GL_RED, GL_RG }; + const int div[2] = {1, 2}; // divide UV texture size by 2 because chroma is half size + + for(int i = 0; i < 2; ++i) { + self->target_textures[i] = gl_create_texture(self->params.egl, video_codec_context->width / div[i], video_codec_context->height / div[i], self->params.color_depth == GSR_COLOR_DEPTH_8_BITS ? internal_formats_nv12[i] : internal_formats_p010[i], formats[i]); + if(self->target_textures[i] == 0) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_setup_textures: failed to create opengl texture\n"); + return false; + } + } + + self->params.egl->glGenBuffers(2, self->pbo_y); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[0]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[1]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + self->params.egl->glGenBuffers(2, self->pbo_uv); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[0]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[1]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + self->sw_frame = av_frame_alloc(); + self->sw_frame->format = AV_PIX_FMT_NV12; + self->sw_frame->width = frame->width; + self->sw_frame->height = frame->height; + + // TODO: Remove + if(av_frame_get_buffer(self->sw_frame, 0) < 0) { + fprintf(stderr, "failed to allocate sw frame\n"); + } + + // TODO: Remove + if(av_frame_make_writable(self->sw_frame) < 0) { + fprintf(stderr, "failed to make writable\n"); + } + return true; +} + +static void gsr_video_encoder_vulkan_stop(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context); + +static bool gsr_video_encoder_vulkan_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { + gsr_video_encoder_vulkan *self = encoder->priv; + + if(!gsr_video_encoder_vulkan_setup_context(self, video_codec_context)) { + gsr_video_encoder_vulkan_stop(self, video_codec_context); + return false; + } + + if(!gsr_video_encoder_vulkan_setup_textures(self, video_codec_context, frame)) { + gsr_video_encoder_vulkan_stop(self, video_codec_context); + return false; + } + + return true; +} + +void gsr_video_encoder_vulkan_stop(gsr_video_encoder_vulkan *self, AVCodecContext *video_codec_context) { + self->params.egl->glDeleteTextures(2, self->target_textures); + self->target_textures[0] = 0; + self->target_textures[1] = 0; + + if(video_codec_context->hw_frames_ctx) + av_buffer_unref(&video_codec_context->hw_frames_ctx); + if(self->device_ctx) + av_buffer_unref(&self->device_ctx); +} + +static void nop_free(void *opaque, uint8_t *data) { + +} + +static void gsr_video_encoder_vulkan_copy_textures_to_frame(gsr_video_encoder *encoder, AVFrame *frame, gsr_color_conversion *color_conversion) { + gsr_video_encoder_vulkan *self = encoder->priv; + + static int counter = 0; + ++counter; + + // AVBufferRef *av_buffer_create(uint8_t *data, size_t size, + // void (*free)(void *opaque, uint8_t *data), + // void *opaque, int flags); + + while(self->params.egl->glGetError()){} + self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[0]); + //fprintf(stderr, "1 gl err: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[counter % 2]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + self->params.egl->glReadPixels(0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, 0); + //fprintf(stderr, "2 gl err: %d\n", self->params.egl->glGetError()); + + const int next_pbo_y = (counter + 1) % 2; + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, frame->width * frame->height, 0, GL_STREAM_READ); + //fprintf(stderr, "3 gl err: %d\n", self->params.egl->glGetError()); + uint8_t *ptr_y = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + //fprintf(stderr, "4 gl err: %d\n", self->params.egl->glGetError()); + if(!ptr_y) { + fprintf(stderr, "failed to map buffer y!\n"); + } + + while(self->params.egl->glGetError()){} + self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, color_conversion->framebuffers[1]); + //fprintf(stderr, "5 gl err: %d\n", self->params.egl->glGetError()); + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[counter % 2]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + //fprintf(stderr, "5.5 gl err: %d\n", self->params.egl->glGetError()); + self->params.egl->glReadPixels(0, 0, frame->width/2, frame->height/2, GL_RG, GL_UNSIGNED_BYTE, 0); + //fprintf(stderr, "6 gl err: %d\n", self->params.egl->glGetError()); + + const int next_pbo_uv = (counter + 1) % 2; + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_uv[next_pbo_uv]); + self->params.egl->glBufferData(GL_PIXEL_PACK_BUFFER, (frame->width/2 * frame->height/2) * 2, 0, GL_STREAM_READ); + //fprintf(stderr, "7 gl err: %d\n", self->params.egl->glGetError()); + uint8_t *ptr_uv = (uint8_t*)self->params.egl->glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + //fprintf(stderr, "8 gl err: %d\n", self->params.egl->glGetError()); + if(!ptr_uv) { + fprintf(stderr, "failed to map buffer uv!\n"); + } + + //self->sw_frame->buf[0] = av_buffer_create(ptr_y, 3840 * 2160, nop_free, NULL, 0); + //self->sw_frame->buf[1] = av_buffer_create(ptr_uv, 1920 * 1080 * 2, nop_free, NULL, 0); + //self->sw_frame->data[0] = self->sw_frame->buf[0]->data; + //self->sw_frame->data[1] = self->sw_frame->buf[1]->data; + //self->sw_frame->extended_data[0] = self->sw_frame->data[0]; + //self->sw_frame->extended_data[1] = self->sw_frame->data[1]; + + self->sw_frame->data[0] = ptr_y; + self->sw_frame->data[1] = ptr_uv; + + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + self->params.egl->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + + //self->params.egl->glBindTexture(GL_TEXTURE_2D, self->target_textures[1]); + //self->params.egl->glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, sw_frame->data[1]); + + //self->params.egl->glBindTexture(GL_TEXTURE_2D, 0); + + int ret = av_hwframe_transfer_data(frame, self->sw_frame, 0); + if(ret < 0) { + fprintf(stderr, "transfer data failed, error: %s\n", av_err2str(ret)); + } + + //av_buffer_unref(&self->sw_frame->buf[0]); + //av_buffer_unref(&self->sw_frame->buf[1]); + + //av_frame_free(&sw_frame); + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_y]); + self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, self->pbo_y[next_pbo_uv]); + self->params.egl->glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + self->params.egl->glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +} + +static void gsr_video_encoder_vulkan_get_textures(gsr_video_encoder *encoder, unsigned int *textures, int *num_textures, gsr_destination_color *destination_color) { + gsr_video_encoder_vulkan *self = encoder->priv; + textures[0] = self->target_textures[0]; + textures[1] = self->target_textures[1]; + *num_textures = 2; + *destination_color = self->params.color_depth == GSR_COLOR_DEPTH_10_BITS ? GSR_DESTINATION_COLOR_P010 : GSR_DESTINATION_COLOR_NV12; +} + +static void gsr_video_encoder_vulkan_destroy(gsr_video_encoder *encoder, AVCodecContext *video_codec_context) { + gsr_video_encoder_vulkan_stop(encoder->priv, video_codec_context); + free(encoder->priv); + free(encoder); +} + +gsr_video_encoder* gsr_video_encoder_vulkan_create(const gsr_video_encoder_vulkan_params *params) { + gsr_video_encoder *encoder = calloc(1, sizeof(gsr_video_encoder)); + if(!encoder) + return NULL; + + gsr_video_encoder_vulkan *encoder_vulkan = calloc(1, sizeof(gsr_video_encoder_vulkan)); + if(!encoder_vulkan) { + free(encoder); + return NULL; + } + + encoder_vulkan->params = *params; + + *encoder = (gsr_video_encoder) { + .start = gsr_video_encoder_vulkan_start, + .copy_textures_to_frame = gsr_video_encoder_vulkan_copy_textures_to_frame, + .get_textures = gsr_video_encoder_vulkan_get_textures, + .destroy = gsr_video_encoder_vulkan_destroy, + .priv = encoder_vulkan + }; + + return encoder; +} |