From b7c5df3c4bbd30bbca91f6e9bdb2bf4d606c46f5 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 31 Aug 2024 03:17:18 +0200 Subject: Faster startup (faster video codec query), fix some video codec callback logic --- src/encoder/video/cuda.c | 236 ++++++++++++++++++++++++++++++++++++++++++---- src/encoder/video/vaapi.c | 24 +++-- 2 files changed, 236 insertions(+), 24 deletions(-) (limited to 'src/encoder') diff --git a/src/encoder/video/cuda.c b/src/encoder/video/cuda.c index 53abe78..69e570f 100644 --- a/src/encoder/video/cuda.c +++ b/src/encoder/video/cuda.c @@ -1,11 +1,13 @@ #include "../../../include/encoder/video/cuda.h" #include "../../../include/egl.h" #include "../../../include/cuda.h" +#include "../../../external/nvEncodeAPI.h" #include #include #include +#include typedef struct { gsr_video_encoder_cuda_params params; @@ -122,22 +124,224 @@ static bool gsr_video_encoder_cuda_setup_textures(gsr_video_encoder_cuda *self, return true; } +static void* open_nvenc_library(void) { + dlerror(); /* clear */ + void *lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY); + if(!lib) { + lib = dlopen("libnvidia-encode.so", RTLD_LAZY); + if(!lib) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs failed: failed to load libnvidia-encode.so/libnvidia-encode.so.1, error: %s\n", dlerror()); + return NULL; + } + } + return lib; +} + +static bool profile_is_h264(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_H264_PROFILE_BASELINE_GUID, + &NV_ENC_H264_PROFILE_MAIN_GUID, + &NV_ENC_H264_PROFILE_HIGH_GUID, + &NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID, + &NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID + }; + + for(int i = 0; i < 5; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_hevc(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_HEVC_PROFILE_MAIN_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_hevc_10bit(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_HEVC_PROFILE_MAIN10_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool profile_is_av1(const GUID *profile_guid) { + const GUID *h264_guids[] = { + &NV_ENC_AV1_PROFILE_MAIN_GUID, + }; + + for(int i = 0; i < 1; ++i) { + if(memcmp(profile_guid, h264_guids[i], sizeof(GUID)) == 0) + return true; + } + + return false; +} + +static bool encoder_get_supported_profiles(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, const GUID *encoder_guid, gsr_supported_video_codecs *supported_video_codecs) { + bool success = false; + GUID *profile_guids = NULL; + + uint32_t profile_guid_count = 0; + if(function_list->nvEncGetEncodeProfileGUIDCount(nvenc_encoder, *encoder_guid, &profile_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + if(profile_guid_count == 0) + goto fail; + + profile_guids = calloc(profile_guid_count, sizeof(GUID)); + if(!profile_guids) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)profile_guid_count); + goto fail; + } + + if(function_list->nvEncGetEncodeProfileGUIDs(nvenc_encoder, *encoder_guid, profile_guids, profile_guid_count, &profile_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeProfileGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + for(uint32_t i = 0; i < profile_guid_count; ++i) { + if(profile_is_h264(&profile_guids[i])) { + supported_video_codecs->h264 = true; + } else if(profile_is_hevc(&profile_guids[i])) { + supported_video_codecs->hevc = true; + } else if(profile_is_hevc_10bit(&profile_guids[i])) { + supported_video_codecs->hevc_hdr = true; + supported_video_codecs->hevc_10bit = true; + } else if(profile_is_av1(&profile_guids[i])) { + supported_video_codecs->av1 = true; + supported_video_codecs->av1_hdr = true; + supported_video_codecs->av1_10bit = true; + } + } + + success = true; + fail: + + if(profile_guids) + free(profile_guids); + + return success; +} + +static bool get_supported_video_codecs(const NV_ENCODE_API_FUNCTION_LIST *function_list, void *nvenc_encoder, gsr_supported_video_codecs *supported_video_codecs) { + bool success = false; + GUID *encoder_guids = NULL; + *supported_video_codecs = (gsr_supported_video_codecs){0}; + + uint32_t encode_guid_count = 0; + if(function_list->nvEncGetEncodeGUIDCount(nvenc_encoder, &encode_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDCount failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + if(encode_guid_count == 0) + goto fail; + + encoder_guids = calloc(encode_guid_count, sizeof(GUID)); + if(!encoder_guids) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to allocate %d guids\n", (int)encode_guid_count); + goto fail; + } + + if(function_list->nvEncGetEncodeGUIDs(nvenc_encoder, encoder_guids, encode_guid_count, &encode_guid_count) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: nvEncGetEncodeGUIDs failed, error: %s\n", function_list->nvEncGetLastErrorString(nvenc_encoder)); + goto fail; + } + + for(uint32_t i = 0; i < encode_guid_count; ++i) { + encoder_get_supported_profiles(function_list, nvenc_encoder, &encoder_guids[i], supported_video_codecs); + } + + success = true; + fail: + + if(encoder_guids) + free(encoder_guids); + + return success; +} + +#define NVENCAPI_VERSION_470 (11 | (1 << 24)) + static gsr_supported_video_codecs gsr_video_encoder_cuda_get_supported_codecs(gsr_video_encoder *encoder, bool cleanup) { (void)encoder; - (void)cleanup; - //gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - // TODO: Query support - return (gsr_supported_video_codecs) { - .h264 = true, - .hevc = true, - .hevc_hdr = true, - .hevc_10bit = true, - .av1 = true, - .av1_hdr = true, - .av1_10bit = true, - .vp8 = false, - .vp9 = false - }; + + void *nvenc_lib = NULL; + gsr_cuda cuda; + memset(&cuda, 0, sizeof(cuda)); + gsr_supported_video_codecs supported_video_codecs = {0}; + + if(!gsr_cuda_load(&cuda, NULL, false)) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to load cuda\n"); + goto done; + } + + nvenc_lib = open_nvenc_library(); + if(!nvenc_lib) + goto done; + + typedef NVENCSTATUS NVENCAPI (*FUNC_NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList); + FUNC_NvEncodeAPICreateInstance NvEncodeAPICreateInstance = (FUNC_NvEncodeAPICreateInstance)dlsym(nvenc_lib, "NvEncodeAPICreateInstance"); + if(!NvEncodeAPICreateInstance) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: failed to find NvEncodeAPICreateInstance in libnvidia-encode.so\n"); + goto done; + } + + NV_ENCODE_API_FUNCTION_LIST function_list; + memset(&function_list, 0, sizeof(function_list)); + function_list.version = NV_ENCODE_API_FUNCTION_LIST_VER; + if(NvEncodeAPICreateInstance(&function_list) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: NvEncodeAPICreateInstance failed\n"); + goto done; + } + + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params; + memset(¶ms, 0, sizeof(params)); + params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + params.device = cuda.cu_ctx; + params.apiVersion = NVENCAPI_VERSION; + + void *nvenc_encoder = NULL; + if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { + // Old nvidia gpus dont support the new nvenc api (which is required for av1). + // In such cases fallback to old api version if possible and try again. + params.apiVersion = NVENCAPI_VERSION_470; + if(function_list.nvEncOpenEncodeSessionEx(¶ms, &nvenc_encoder) != NV_ENC_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_cuda_get_supported_codecs: NvEncOpenEncodeSessionEx failed\n"); + goto done; + } + } + + get_supported_video_codecs(&function_list, nvenc_encoder, &supported_video_codecs); + + done: + if(cleanup) { + function_list.nvEncDestroyEncoder(nvenc_encoder); + if(nvenc_lib) + dlclose(nvenc_lib); + gsr_cuda_unload(&cuda); + } + + return supported_video_codecs; } static void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecContext *video_codec_context); @@ -145,8 +349,8 @@ static void gsr_video_encoder_cuda_stop(gsr_video_encoder_cuda *self, AVCodecCon static bool gsr_video_encoder_cuda_start(gsr_video_encoder *encoder, AVCodecContext *video_codec_context, AVFrame *frame) { gsr_video_encoder_cuda *encoder_cuda = encoder->priv; - // TODO: Force set overclock to false if wayland - if(!gsr_cuda_load(&encoder_cuda->cuda, encoder_cuda->params.egl->x11.dpy, encoder_cuda->params.overclock)) { + const bool overclock = gsr_egl_get_display_server(encoder_cuda->params.egl) == GSR_DISPLAY_SERVER_X11 ? encoder_cuda->params.overclock : false; + if(!gsr_cuda_load(&encoder_cuda->cuda, encoder_cuda->params.egl->x11.dpy, overclock)) { fprintf(stderr, "gsr error: gsr_video_encoder_cuda_start: failed to load cuda\n"); gsr_video_encoder_cuda_stop(encoder_cuda, video_codec_context); return false; diff --git a/src/encoder/video/vaapi.c b/src/encoder/video/vaapi.c index 2185ebc..a6c45f9 100644 --- a/src/encoder/video/vaapi.c +++ b/src/encoder/video/vaapi.c @@ -246,16 +246,18 @@ static bool get_supported_video_codecs(VADisplay va_dpy, gsr_supported_video_cod int va_major = 0; int va_minor = 0; - if(vaInitialize(va_dpy, &va_major, &va_minor) != VA_STATUS_SUCCESS) - return false; + if(vaInitialize(va_dpy, &va_major, &va_minor) != VA_STATUS_SUCCESS) { + fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: vaInitialize failed\n"); + goto fail; + } int num_profiles = vaMaxNumProfiles(va_dpy); if(num_profiles <= 0) - goto done; + goto fail; profile_list = calloc(num_profiles, sizeof(VAProfile)); if(!profile_list || vaQueryConfigProfiles(va_dpy, profile_list, &num_profiles) != VA_STATUS_SUCCESS) - goto done; + goto fail; for(int i = 0; i < num_profiles; ++i) { if(profile_is_h264(profile_list[i])) { @@ -285,7 +287,7 @@ static bool get_supported_video_codecs(VADisplay va_dpy, gsr_supported_video_cod } success = true; - done: + fail: if(profile_list) free(profile_list); @@ -299,16 +301,22 @@ static gsr_supported_video_codecs gsr_video_encoder_vaapi_get_supported_codecs(g gsr_video_encoder_vaapi *encoder_vaapi = encoder->priv; gsr_supported_video_codecs supported_video_codecs = {0}; - const int drm_fd = open(encoder_vaapi->params.egl->card_path, O_RDWR); + char render_path[128]; + if(!gsr_card_path_get_render_path(encoder_vaapi->params.egl->card_path, render_path)) { + fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to get /dev/dri/renderDXXX file from %s\n", encoder_vaapi->params.egl->card_path); + return supported_video_codecs; + } + + const int drm_fd = open(render_path, O_RDWR); if(drm_fd == -1) { - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to open device %s\n", encoder_vaapi->params.egl->card_path); + fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to open device %s\n", render_path); return supported_video_codecs; } VADisplay va_dpy = vaGetDisplayDRM(drm_fd); if(va_dpy) { if(!get_supported_video_codecs(va_dpy, &supported_video_codecs, cleanup)) - fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to query supported video codecs for device %s\n", encoder_vaapi->params.egl->card_path); + fprintf(stderr, "gsr error: gsr_video_encoder_vaapi_get_supported_codecs: failed to query supported video codecs for device %s\n", render_path); } if(cleanup) -- cgit v1.2.3