aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2023-03-31 01:11:12 +0200
committerdec05eba <dec05eba@protonmail.com>2023-03-31 01:11:12 +0200
commit82e37200849935beac7213877b526aeaf2170274 (patch)
tree9d972659f2e5b171a978e7b64137443b17aa87de /src
parentf57b641d6db6513d16eb1aef4fcc444a102b6245 (diff)
Install coolbits if using nvidia, add preserve video memory install script
Diffstat (limited to 'src')
-rw-r--r--src/capture/nvfbc.c31
-rw-r--r--src/cuda.c10
-rw-r--r--src/main.cpp3
-rw-r--r--src/overclock.c72
4 files changed, 85 insertions, 31 deletions
diff --git a/src/capture/nvfbc.c b/src/capture/nvfbc.c
index c1c998b..d538b5b 100644
--- a/src/capture/nvfbc.c
+++ b/src/capture/nvfbc.c
@@ -20,6 +20,7 @@ typedef struct {
PNVFBCCREATEINSTANCE nv_fbc_create_instance;
NVFBC_API_FUNCTION_LIST nv_fbc_function_list;
bool fbc_handle_created;
+ bool capture_session_created;
gsr_cuda cuda;
bool frame_initialized;
@@ -225,9 +226,9 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
NVFBCSTATUS status;
NVFBC_TRACKING_TYPE tracking_type;
- bool capture_session_created = false;
uint32_t output_id = 0;
cap_nvfbc->fbc_handle_created = false;
+ cap_nvfbc->capture_session_created = false;
NVFBC_CREATE_HANDLE_PARAMS create_params;
memset(&create_params, 0, sizeof(create_params));
@@ -295,6 +296,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
create_capture_params.dwSamplingRateMs = 1000u / ((uint32_t)cap_nvfbc->params.fps + 1);
create_capture_params.bAllowDirectCapture = direct_capture ? NVFBC_TRUE : NVFBC_FALSE;
create_capture_params.bPushModel = direct_capture ? NVFBC_TRUE : NVFBC_FALSE;
+ //create_capture_params.bDisableAutoModesetRecovery = true; // TODO:
if(tracking_type == NVFBC_TRACKING_OUTPUT)
create_capture_params.dwOutputId = output_id;
@@ -303,7 +305,7 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
fprintf(stderr, "gsr error: gsr_capture_nvfbc_start failed: %s\n", cap_nvfbc->nv_fbc_function_list.nvFBCGetLastErrorStr(cap_nvfbc->nv_fbc_handle));
goto error_cleanup;
}
- capture_session_created = true;
+ cap_nvfbc->capture_session_created = true;
NVFBC_TOCUDA_SETUP_PARAMS setup_params;
memset(&setup_params, 0, sizeof(setup_params));
@@ -331,11 +333,12 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
error_cleanup:
if(cap_nvfbc->fbc_handle_created) {
- if(capture_session_created) {
+ if(cap_nvfbc->capture_session_created) {
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params;
memset(&destroy_capture_params, 0, sizeof(destroy_capture_params));
destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER;
cap_nvfbc->nv_fbc_function_list.nvFBCDestroyCaptureSession(cap_nvfbc->nv_fbc_handle, &destroy_capture_params);
+ cap_nvfbc->capture_session_created = false;
}
NVFBC_DESTROY_HANDLE_PARAMS destroy_params;
@@ -357,15 +360,21 @@ static int gsr_capture_nvfbc_start(gsr_capture *cap, AVCodecContext *video_codec
static void gsr_capture_nvfbc_destroy_session(gsr_capture *cap) {
gsr_capture_nvfbc *cap_nvfbc = cap->priv;
- NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params;
- memset(&destroy_capture_params, 0, sizeof(destroy_capture_params));
- destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER;
- cap_nvfbc->nv_fbc_function_list.nvFBCDestroyCaptureSession(cap_nvfbc->nv_fbc_handle, &destroy_capture_params);
+ if(cap_nvfbc->fbc_handle_created) {
+ if(cap_nvfbc->capture_session_created) {
+ NVFBC_DESTROY_CAPTURE_SESSION_PARAMS destroy_capture_params;
+ memset(&destroy_capture_params, 0, sizeof(destroy_capture_params));
+ destroy_capture_params.dwVersion = NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER;
+ cap_nvfbc->nv_fbc_function_list.nvFBCDestroyCaptureSession(cap_nvfbc->nv_fbc_handle, &destroy_capture_params);
+ cap_nvfbc->capture_session_created = false;
+ }
- NVFBC_DESTROY_HANDLE_PARAMS destroy_params;
- memset(&destroy_params, 0, sizeof(destroy_params));
- destroy_params.dwVersion = NVFBC_DESTROY_HANDLE_PARAMS_VER;
- cap_nvfbc->nv_fbc_function_list.nvFBCDestroyHandle(cap_nvfbc->nv_fbc_handle, &destroy_params);
+ NVFBC_DESTROY_HANDLE_PARAMS destroy_params;
+ memset(&destroy_params, 0, sizeof(destroy_params));
+ destroy_params.dwVersion = NVFBC_DESTROY_HANDLE_PARAMS_VER;
+ cap_nvfbc->nv_fbc_function_list.nvFBCDestroyHandle(cap_nvfbc->nv_fbc_handle, &destroy_params);
+ cap_nvfbc->fbc_handle_created = false;
+ }
cap_nvfbc->nv_fbc_handle = 0;
}
diff --git a/src/cuda.c b/src/cuda.c
index 470747b..ea14e8b 100644
--- a/src/cuda.c
+++ b/src/cuda.c
@@ -94,6 +94,11 @@ bool gsr_cuda_load(gsr_cuda *self, Display *display, bool do_overclock) {
}
void gsr_cuda_unload(gsr_cuda *self) {
+ if(self->do_overclock && self->overclock.xnvctrl.library) {
+ gsr_overclock_stop(&self->overclock);
+ gsr_overclock_unload(&self->overclock);
+ }
+
if(self->library) {
if(self->cu_ctx) {
self->cuCtxDestroy_v2(self->cu_ctx);
@@ -102,10 +107,5 @@ void gsr_cuda_unload(gsr_cuda *self) {
dlclose(self->library);
}
- if(self->do_overclock && self->overclock.xnvctrl.library) {
- gsr_overclock_stop(&self->overclock);
- gsr_overclock_unload(&self->overclock);
- }
-
memset(self, 0, sizeof(gsr_cuda));
}
diff --git a/src/main.cpp b/src/main.cpp
index 36fcfe5..4cd8682 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -659,7 +659,7 @@ static void open_video(AVCodecContext *codec_context, VideoQuality video_quality
}
static void usage() {
- fprintf(stderr, "usage: gpu-screen-recorder -w <window_id|monitor|focused> [-c <container_format>] [-s WxH] -f <fps> [-a <audio_input>...] [-q <quality>] [-r <replay_buffer_size_sec>] [-k h264|h265] [-ac aac|opus|flac] [-oc yes|no] [-o <output_file>]\n");
+ fprintf(stderr, "usage: gpu-screen-recorder -w <window_id|monitor|focused> [-c <container_format>] [-s WxH] -f <fps> [-a <audio_input>] [-q <quality>] [-r <replay_buffer_size_sec>] [-k h264|h265] [-ac aac|opus|flac] [-oc yes|no] [-o <output_file>]\n");
fprintf(stderr, "\n");
fprintf(stderr, "OPTIONS:\n");
fprintf(stderr, " -w Window to record, a display, \"screen\", \"screen-direct\", \"screen-direct-force\" or \"focused\".\n");
@@ -2005,6 +2005,7 @@ int main(int argc, char **argv) {
frame_time_overflow = std::min(frame_time_overflow, target_fps);
frame_timer_start = time_now - frame_time_overflow;
gsr_capture_capture(capture, frame);
+
std::lock_guard<std::mutex> lock(video_frame_mutex);
if(latest_video_frame) {
av_frame_free(&latest_video_frame);
diff --git a/src/overclock.c b/src/overclock.c
index 8517876..7c0faad 100644
--- a/src/overclock.c
+++ b/src/overclock.c
@@ -8,10 +8,12 @@
// So to get around this we overclock memory transfer rate (maybe this should also be done for graphics clock?) to the best performance level while GPU Screen Recorder is running.
// TODO: Does it always drop to performance level 2?
-// TODO: Also do the same for graphics clock and graphics memory?
-// Fields are 0 if not set
+static int min_int(int a, int b) {
+ return a < b ? a : b;
+}
+// Fields are 0 if not set
typedef struct {
int perf;
@@ -48,31 +50,56 @@ static void split_by_delimiter(const char *str, size_t size, char delimiter, spl
}
}
+typedef enum {
+ NVCTRL_GPU_NVCLOCK,
+ NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE,
+} NvCTRLAttributeType;
+
+static unsigned int attribute_type_to_attribute_param(NvCTRLAttributeType attribute_type) {
+ switch(attribute_type) {
+ case NVCTRL_GPU_NVCLOCK:
+ return NV_CTRL_GPU_NVCLOCK_OFFSET;
+ case NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE:
+ return NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET;
+ }
+ return 0;
+}
+
+static unsigned int attribute_type_to_attribute_param_all_levels(NvCTRLAttributeType attribute_type) {
+ switch(attribute_type) {
+ case NVCTRL_GPU_NVCLOCK:
+ return NV_CTRL_GPU_NVCLOCK_OFFSET_ALL_PERFORMANCE_LEVELS;
+ case NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE:
+ return NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS;
+ }
+ return 0;
+}
+
// Returns 0 on error
-static int xnvctrl_get_memory_transfer_rate_max(gsr_xnvctrl *xnvctrl, const NVCTRLPerformanceLevelQuery *query) {
+static int xnvctrl_get_attribute_max_value(gsr_xnvctrl *xnvctrl, const NVCTRLPerformanceLevelQuery *query, NvCTRLAttributeType attribute_type) {
NVCTRLAttributeValidValuesRec valid;
- if(xnvctrl->XNVCTRLQueryValidTargetAttributeValues(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, 0, NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS, &valid)) {
+ if(xnvctrl->XNVCTRLQueryValidTargetAttributeValues(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, 0, attribute_type_to_attribute_param_all_levels(attribute_type), &valid)) {
return valid.u.range.max;
}
- if(query->num_performance_levels > 0 && xnvctrl->XNVCTRLQueryValidTargetAttributeValues(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, query->num_performance_levels - 1, NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET, &valid)) {
+ if(query->num_performance_levels > 0 && xnvctrl->XNVCTRLQueryValidTargetAttributeValues(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, query->num_performance_levels - 1, attribute_type_to_attribute_param(attribute_type), &valid)) {
return valid.u.range.max;
}
return 0;
}
-static bool xnvctrl_set_memory_transfer_rate_offset(gsr_xnvctrl *xnvctrl, int num_performance_levels, int offset) {
+static bool xnvctrl_set_attribute_offset(gsr_xnvctrl *xnvctrl, int num_performance_levels, int offset, NvCTRLAttributeType attribute_type) {
bool success = false;
// NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS works (or at least used to?) without Xorg running as root
// so we try that first. NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS also only works with GTX 1000+.
// TODO: Reverse engineer NVIDIA Xorg driver so we can set this always without root access.
- if(xnvctrl->XNVCTRLSetTargetAttributeAndGetStatus(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, 0, NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS, offset))
+ if(xnvctrl->XNVCTRLSetTargetAttributeAndGetStatus(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, 0, attribute_type_to_attribute_param_all_levels(attribute_type), offset))
success = true;
for(int i = 0; i < num_performance_levels; ++i) {
- success |= xnvctrl->XNVCTRLSetTargetAttributeAndGetStatus(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, i, NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET, offset);
+ success |= xnvctrl->XNVCTRLSetTargetAttributeAndGetStatus(xnvctrl->display, NV_CTRL_TARGET_TYPE_GPU, 0, i, attribute_type_to_attribute_param(attribute_type), offset);
}
return success;
@@ -207,21 +234,38 @@ bool gsr_overclock_start(gsr_overclock *self) {
}
self->num_performance_levels = query.num_performance_levels;
- int target_transfer_rate_offset = xnvctrl_get_memory_transfer_rate_max(&self->xnvctrl, &query) / 2;
- if(query.num_performance_levels > 3) {
+ int target_transfer_rate_offset = xnvctrl_get_attribute_max_value(&self->xnvctrl, &query, NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE) / 2; // Divide by 2 just to be safe that we dont set it too high
+ if(query.num_performance_levels > 2) {
const int transfer_rate_max_diff = query.performance_level[query.num_performance_levels - 1].mem_transfer_rate_max - query.performance_level[2].mem_transfer_rate_max;
- if(transfer_rate_max_diff > 0 && transfer_rate_max_diff < target_transfer_rate_offset)
- target_transfer_rate_offset = transfer_rate_max_diff;
+ target_transfer_rate_offset = min_int(target_transfer_rate_offset, transfer_rate_max_diff);
}
- if(xnvctrl_set_memory_transfer_rate_offset(&self->xnvctrl, self->num_performance_levels, target_transfer_rate_offset)) {
+ if(xnvctrl_set_attribute_offset(&self->xnvctrl, self->num_performance_levels, target_transfer_rate_offset, NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE)) {
fprintf(stderr, "gsr info: gsr_overclock_start: sucessfully set memory transfer rate offset to %d\n", target_transfer_rate_offset);
} else {
fprintf(stderr, "gsr info: gsr_overclock_start: failed to overclock memory transfer rate offset to %d\n", target_transfer_rate_offset);
}
+
+
+ // TODO: Enable. Crashes on my system (gtx 1080) so it's disabled for now. Seems to crash even if graphics clock is increasd by 1, let alone 1200
+ /*
+ int target_nv_clock_offset = xnvctrl_get_attribute_max_value(&self->xnvctrl, &query, NVCTRL_GPU_NVCLOCK) / 2; // Divide by 2 just to be safe that we dont set it too high
+ if(query.num_performance_levels > 2) {
+ const int nv_clock_max_diff = query.performance_level[query.num_performance_levels - 1].nv_clock_max - query.performance_level[2].nv_clock_max;
+ target_nv_clock_offset = min_int(target_nv_clock_offset, nv_clock_max_diff);
+ }
+
+ if(xnvctrl_set_attribute_offset(&self->xnvctrl, self->num_performance_levels, target_nv_clock_offset, NVCTRL_GPU_NVCLOCK)) {
+ fprintf(stderr, "gsr info: gsr_overclock_start: sucessfully set nv clock offset to %d\n", target_nv_clock_offset);
+ } else {
+ fprintf(stderr, "gsr info: gsr_overclock_start: failed to overclock nv clock offset to %d\n", target_nv_clock_offset);
+ }
+ */
+
return true;
}
void gsr_overclock_stop(gsr_overclock *self) {
- xnvctrl_set_memory_transfer_rate_offset(&self->xnvctrl, self->num_performance_levels, 0);
+ xnvctrl_set_attribute_offset(&self->xnvctrl, self->num_performance_levels, 0, NVCTRL_ATTRIB_GPU_MEM_TRANSFER_RATE);
+ //xnvctrl_set_attribute_offset(&self->xnvctrl, self->num_performance_levels, 0, NVCTRL_GPU_NVCLOCK);
}