From b7c5df3c4bbd30bbca91f6e9bdb2bf4d606c46f5 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 31 Aug 2024 03:17:18 +0200 Subject: Faster startup (faster video codec query), fix some video codec callback logic --- src/encoder/video/cuda.c | 236 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 220 insertions(+), 16 deletions(-) (limited to 'src/encoder/video/cuda.c') diff --git a/src/encoder/video/cuda.c b/src/encoder/video/cuda.c index 53abe78..69e570f 100644 --- a/src/encoder/video/cuda.c +++ b/src/encoder/video/cuda.c @@ -1,11 +1,13 @@ #include "../../../include/encoder/video/cuda.h" #include "../../../include/egl.h" #include "../../../include/cuda.h" +#include "../../../external/nvEncodeAPI.h" #include #include #include +#include typedef struct { gsr_video_encoder_cuda_params params; @@ -122,22 +124,224 @@ static bool gsr_video_encoder_cuda_setup_textures(gsr_video_encoder_cuda *self, return true; } +static void* open_nvenc_library(void) { + dlerror(); /* clear */ + void *lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY); + if(!lib) { + lib = dlopen("libnvidia-encode.so", RTLD_LAZY); + if(!lib) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs failed: failed to load libnvidia-encode.so/libnvidia-encode.so.1, error: %s\n", dlerror()); + return NULL; + } + } + return lib; +} + +static bool profile_is_h264(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_H264_PROFILE_BASELINE_GUID, + &NV_ENC_H264_PROFILE_MAIN_GUID, + &NV_ENC_H264_PROFILE_HIGH_GUID, + &NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID, + &NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID + }; + + for(int i = 0; i < 5; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_hevc(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_HEVC_PROFILE_MAIN_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_hevc_10bit(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_HEVC_PROFILE_MAIN10_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_av1(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_AV1_PROFILE_MAIN_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool encoder_get_supported_profiles(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, const GUID *encoder_guid, gsr_supported_video_codecs *supported_video_codecs) { + bool success = false; + GUID *profile_guids = NULL; + + uint32_t profile_guid_count = 0; + if(function_list->nvEncGetEncodeProfileGUIDCount(nvenc_encoder, *encoder_guid, &profile_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + if(profile_guid_count == 0) + goto fail; + + profile_guids = calloc(profile_guid_count, sizeof(GUID)); + if(!profile_guids) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)profile_guid_count); + goto fail; + } + + if(function_list->nvEncGetEncodeProfileGUIDs(nvenc_encoder, *encoder_guid, profile_guids, profile_guid_count, &profile_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + for(uint32_t i = 0; i < profile_guid_count; ++i) { + if(profile_is_h264(&profile_guids[i])) { + supported_video_codecs->h264 = true; + } else if(profile_is_hevc(&profile_guids[i])) { + supported_video_codecs->hevc = true; + } else if(profile_is_hevc_10bit(&profile_guids[i])) { + supported_video_codecs->hevc_hdr = true; + supported_video_codecs->hevc_10bit = true; + } else if(profile_is_av1(&profile_guids[i])) { + supported_video_codecs->av1 = true; + supported_video_codecs->av1_hdr = true; + supported_video_codecs->av1_10bit = true; + } + } + + success = true; + fail: + + if(profile_guids) + free(profile_guids); + + return success; +} + +static bool get_supported_video_codecs(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, gsr_supported_video_codecs *supported_video_codecs) { + bool success = false; + GUID *encoder_guids = NULL; + *supported_video_codecs = (gsr_supported_video_codecs){0}; + + uint32_t encode_guid_count = 0; + if(function_list->nvEncGetEncodeGUIDCount(nvenc_encoder, &encode_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + if(encode_guid_count == 0) + goto fail; + + encoder_guids = calloc(encode_guid_count, sizeof(GUID)); + if(!encoder_guids) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)encode_guid_count); + goto fail; + } + + if(function_list->nvEncGetEncodeGUIDs(nvenc_encoder, encoder_guids, encode_guid_count, &encode_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + for(uint32_t i = 0; i < encode_guid_count; ++i) { + encoder_get_supported_profiles(function_list, nvenc_encoder, &encoder_guids[i], supported_video_codecs); + } + + success = true; + fail: + + if(encoder_guids) + free(encoder_guids); + + return success; +} + +#define NVENCAPI_VERSION_470 (11 | (1 << 24)) + static gsr_supported_video_codecs gsr_video_encoder_cuda_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { (void)encoder; - (void)cleanup; - //gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - // TODO: Query support - return (gsr_supported_video_codecs) { - .h264 = true, - .hevc = true, - .hevc_hdr = true, - .hevc_10bit = true, - .av1 = true, - .av1_hdr = true, - .av1_10bit = true, - .vp8 = false, - .vp9 = false - }; + + void *nvenc_lib = NULL; + gsr_cuda cuda; + memset(&cuda, 0, sizeof(cuda)); + gsr_supported_video_codecs supported_video_codecs = {0}; + + if(!gsr_cuda_load(&cuda, NULL, false)) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to load cuda\n"); + goto done; + } + + nvenc_lib = open_nvenc_library(); + if(!nvenc_lib) + goto done; + + typedef NVENCSTATUS NVENCAPI (*FUNC_NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList); + FUNC_NvEncodeAPICreateInstance NvEncodeAPICreateInstance = (FUNC_NvEncodeAPICreateInstance)dlsym(nvenc_lib, "NvEncodeAPICreateInstance"); + if(!NvEncodeAPICreateInstance) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to find NvEncodeAPICreateInstance in libnvidia-encode.so\n"); + goto done; + } + + NV_ENCODE_API_FUNCTION_LIST function_list; + memset(&function_list, 0, sizeof(function_list)); + function_list.version = NV_ENCODE_API_FUNCTION_LIST_VER; + if(NvEncodeAPICreateInstance(&function_list) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: NvEncodeAPICreateInstance failed\n"); + goto done; + } + + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params; + memset(¶ms, 0, sizeof(params)); + params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + params.device = cuda.cu_ctx; + params.apiVersion = NVENCAPI_VERSION; + + void *nvenc_encoder = NULL; + if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { + // Old nvidia gpus dont support the new nvenc api (which is required for av1). + // In such cases fallback to old api version if possible and try again. + params.apiVersion = NVENCAPI_VERSION_470; + if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: NvEncOpenEncodeSessionEx failed\n"); + goto done; + } + } + + get_supported_video_codecs(&function_list, nvenc_encoder, &supported_video_codecs); + + done: + if(cleanup) { + function_list.nvEncDestroyEncoder(nvenc_encoder); + if(nvenc_lib) + dlclose(nvenc_lib); + gsr_cuda_unload(&cuda); + } + + return supported_video_codecs; } static void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context); @@ -145,8 +349,8 @@ static void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecCon static bool gsr_video_encoder_cuda_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - // TODO: Force set overclock to false if wayland - if(!gsr_cuda_load(&encoder_cuda->cuda, encoder_cuda->params.egl->x11.dpy, encoder_cuda->params.overclock)) { + const bool overclock = gsr_egl_get_display_server(encoder_cuda->params.egl) == GSR_DISPLAY_SERVER_X11 ? encoder_cuda->params.overclock : false; + if(!gsr_cuda_load(&encoder_cuda->cuda, encoder_cuda->params.egl->x11.dpy, overclock)) { fprintf(stderr, "gsr error: gsr_video_encoder_cuda_start: failed to load cuda\n"); gsr_video_encoder_cuda_stop(encoder_cuda, video_codec_context); return false; -- cgit v1.2.3