From 93cb6593a642018e3373fb7099e1c0687b367176 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 17 Mar 2023 17:17:14 +0100 Subject: Add overclocking option -oc to workaround a NVIDIA driver bug (forcefully set to p2 state when using cuda) --- include/capture/nvfbc.h | 3 ++- include/capture/xcomposite_cuda.h | 1 + include/cuda.h | 10 ++++++--- include/overclock.h | 17 ++++++++++++++++ include/xnvctrl.h | 43 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 include/overclock.h create mode 100644 include/xnvctrl.h (limited to 'include') diff --git a/include/capture/nvfbc.h b/include/capture/nvfbc.h index 06587d8..431777b 100644 --- a/include/capture/nvfbc.h +++ b/include/capture/nvfbc.h @@ -12,7 +12,8 @@ typedef struct { int fps; vec2i pos; vec2i size; - bool direct_capture; /* temporary disabled */ + bool direct_capture; + bool overclock; } gsr_capture_nvfbc_params; gsr_capture* gsr_capture_nvfbc_create(const gsr_capture_nvfbc_params *params); diff --git a/include/capture/xcomposite_cuda.h b/include/capture/xcomposite_cuda.h index d9b4b10..20d358e 100644 --- a/include/capture/xcomposite_cuda.h +++ b/include/capture/xcomposite_cuda.h @@ -11,6 +11,7 @@ typedef struct { Window window; bool follow_focused; /* If this is set then |window| is ignored */ vec2i region_size; /* This is currently only used with |follow_focused| */ + bool overclock; } gsr_capture_xcomposite_cuda_params; gsr_capture* gsr_capture_xcomposite_cuda_create(const gsr_capture_xcomposite_cuda_params *params); diff --git a/include/cuda.h b/include/cuda.h index cefdcad..ab07429 100644 --- a/include/cuda.h +++ b/include/cuda.h @@ -1,12 +1,15 @@ #ifndef GSR_CUDA_H #define GSR_CUDA_H +#include "overclock.h" #include #include // To prevent hwcontext_cuda.h from including cuda.h #define CUDA_VERSION 11070 +#define CU_CTX_SCHED_AUTO 0 + #if defined(_WIN64) || defined(__LP64__) typedef unsigned long long CUdeviceptr_v2; #else @@ -68,11 +71,12 @@ typedef struct CUDA_MEMCPY2D_st { } CUDA_MEMCPY2D_v2; typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D; -#define CU_CTX_SCHED_AUTO 0 - typedef struct CUgraphicsResource_st *CUgraphicsResource; typedef struct { + gsr_overclock overclock; + bool do_overclock; + void *library; CUcontext cu_ctx; @@ -95,7 +99,7 @@ typedef struct { CUresult (*cuGraphicsSubResourceGetMappedArray)(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); } gsr_cuda; -bool gsr_cuda_load(gsr_cuda *self); +bool gsr_cuda_load(gsr_cuda *self, Display *display, bool overclock); void gsr_cuda_unload(gsr_cuda *self); #endif /* GSR_CUDA_H */ diff --git a/include/overclock.h b/include/overclock.h new file mode 100644 index 0000000..d6ff901 --- /dev/null +++ b/include/overclock.h @@ -0,0 +1,17 @@ +#ifndef GSR_OVERCLOCK_H +#define GSR_OVERCLOCK_H + +#include "xnvctrl.h" + +typedef struct { + gsr_xnvctrl xnvctrl; + int num_performance_levels; +} gsr_overclock; + +bool gsr_overclock_load(gsr_overclock *self, Display *display); +void gsr_overclock_unload(gsr_overclock *self); + +bool gsr_overclock_start(gsr_overclock *self); +void gsr_overclock_stop(gsr_overclock *self); + +#endif /* GSR_OVERCLOCK_H */ diff --git a/include/xnvctrl.h b/include/xnvctrl.h new file mode 100644 index 0000000..8e026c4 --- /dev/null +++ b/include/xnvctrl.h @@ -0,0 +1,43 @@ +#ifndef GSR_XNVCTRL_H +#define GSR_XNVCTRL_H + +#include +#include + +#define NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET 410 +#define NV_CTRL_GPU_MEM_TRANSFER_RATE_OFFSET_ALL_PERFORMANCE_LEVELS 425 + +#define NV_CTRL_TARGET_TYPE_GPU 1 + +#define NV_CTRL_STRING_PERFORMANCE_MODES 29 + +typedef struct _XDisplay Display; + +typedef struct { + int type; + union { + struct { + int64_t min; + int64_t max; + } range; + struct { + unsigned int ints; + } bits; + } u; + unsigned int permissions; +} NVCTRLAttributeValidValuesRec; + +typedef struct { + Display *display; + void *library; + + int (*XNVCTRLQueryExtension)(Display *dpy, int *event_basep, int *error_basep); + int (*XNVCTRLSetTargetAttributeAndGetStatus)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, int value); + int (*XNVCTRLQueryValidTargetAttributeValues)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, NVCTRLAttributeValidValuesRec *values); + int (*XNVCTRLQueryTargetStringAttribute)(Display *dpy, int target_type, int target_id, unsigned int display_mask, unsigned int attribute, char **ptr); +} gsr_xnvctrl; + +bool gsr_xnvctrl_load(gsr_xnvctrl *self, Display *display); +void gsr_xnvctrl_unload(gsr_xnvctrl *self); + +#endif /* GSR_XNVCTRL_H */ -- cgit v1.2.3