3 files changed, 41 insertions, 69 deletions
diff --git a/.gitignore b/.gitignore
index 0cf74c6..7d676bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,5 @@ tests/compile_commands.json
 .clangd/
 .cache/
 
-main.o
-sound.o
+*.o
 gpu-screen-recorder
diff --git a/TODO b/TODO
index e082557..59432d6 100644
--- a/TODO
+++ b/TODO
@@ -1,11 +1,7 @@
 Check for reparent.
 Only add window to list if its the window is a topmost window.
-Use nvEncoder api directly? maybe with this we could copy the window opengl texture directly to the gpu which doesn't work right now for some reason.
-    Right now we are required to copy the opengl texture to another opengl texture first.
-    nvEncRegisterResource allows registering an opengl texture directly with NV_ENC_INPUT_RESOURCE_OPENGL_TEX and using that directly in the encoding.
 Load cuda at runtime with dlopen.
 Track window damages and only update then. That is better for output file size.
-Remove cuda to cuda copy when using nvFBC if possible. ffmpeg is getting in the way.
 Getting the texture of a window when using a compositor is an nvidia specific limitation. When gpu-screen-recorder supports other gpus then this can be ignored.
 Remove dependency on glfw (and glew?).
-Quickly changing workspace and back while recording under i3 breaks the screen recorder. The resize is triggered and it fails to recreate texture (fail to get texture size, texture id probably == 0).
+Quickly changing workspace and back while recording under i3 breaks the screen recorder. i3 probably unmaps windows in other workspaces.
diff --git a/src/main.cpp b/src/main.cpp
index 280e3f3..ea1f2fc 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -310,23 +310,6 @@ static bool recreate_window_pixmap(Display *dpy, Window window_id,
     return pixmap.texture_id != 0 && pixmap.target_texture_id != 0;
 }
 
-std::vector<std::string> get_hardware_acceleration_device_names() {
-    int iGpu = 0;
-    int nGpu = 0;
-    cuDeviceGetCount(&nGpu);
-    if (iGpu < 0 || iGpu >= nGpu) {
-        fprintf(stderr, "Error: failed...\n");
-        return {};
-    }
-
-    CUdevice cuDevice = 0;
-    cuDeviceGet(&cuDevice, iGpu);
-    char deviceName[80];
-    cuDeviceGetName(deviceName, sizeof(deviceName), cuDevice);
-    fprintf(stderr, "device name: %s\n", deviceName);
-    return {deviceName};
-}
-
 // |stream| is only required for non-replay mode
 static void receive_frames(AVCodecContext *av_codec_context, int stream_index, AVStream *stream, AVFrame *frame,
                            AVFormatContext *av_format_context,
@@ -438,7 +421,6 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
 
     assert(codec->type == AVMEDIA_TYPE_VIDEO);
     codec_context->codec_id = codec->id;
-    fprintf(stderr, "codec id: %d\n", codec->id);
     codec_context->width = record_width & ~1;
     codec_context->height = record_height & ~1;
 	codec_context->bit_rate = 7500000 + (codec_context->width * codec_context->height) / 2;
@@ -464,6 +446,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
             //av_opt_set(codec_context->priv_data, "preset", "slow", 0);
             //av_opt_set(codec_context->priv_data, "profile", "high", 0);
             //codec_context->profile = FF_PROFILE_H264_HIGH;
+            av_opt_set(codec_context->priv_data, "preset", "p4", 0);
             break;
         case VideoQuality::HIGH:
             codec_context->qmin = 12;
@@ -471,6 +454,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
             //av_opt_set(codec_context->priv_data, "preset", "slow", 0);
             //av_opt_set(codec_context->priv_data, "profile", "high", 0);
             //codec_context->profile = FF_PROFILE_H264_HIGH;
+            av_opt_set(codec_context->priv_data, "preset", "p6", 0);
             break;
         case VideoQuality::ULTRA:
 	        codec_context->bit_rate = 10000000 + (codec_context->width * codec_context->height) / 2;
@@ -479,6 +463,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
             //av_opt_set(codec_context->priv_data, "preset", "veryslow", 0);
             //av_opt_set(codec_context->priv_data, "profile", "high", 0);
             //codec_context->profile = FF_PROFILE_H264_HIGH;
+            av_opt_set(codec_context->priv_data, "preset", "p7", 0);
             break;
     }
     if (codec_context->codec_id == AV_CODEC_ID_MPEG1VIDEO)
@@ -486,6 +471,7 @@ static AVCodecContext *create_video_codec_context(AVFormatContext *av_format_con
 
     // stream->time_base = codec_context->time_base;
     // codec_context->ticks_per_frame = 30;
+    av_opt_set(codec_context->priv_data, "tune", "hq", 0);
 
     // Some formats want stream headers to be seperate
     if (av_format_context->oformat->flags & AVFMT_GLOBALHEADER)
@@ -524,24 +510,20 @@ static AVFrame* open_audio(AVCodecContext *audio_codec_context) {
 
 static void open_video(AVCodecContext *codec_context,
                        WindowPixmap &window_pixmap, AVBufferRef **device_ctx,
-                       CUgraphicsResource *cuda_graphics_resource) {
+                       CUgraphicsResource *cuda_graphics_resource, CUcontext cuda_context) {
     int ret;
 
-    std::vector<std::string> hardware_accelerated_devices =
-        get_hardware_acceleration_device_names();
-    if (hardware_accelerated_devices.empty()) {
-        fprintf(
-            stderr,
-            "Error: No hardware accelerated device was found on your system\n");
+    *device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
+    if(!*device_ctx) {
+        fprintf(stderr, "Error: Failed to create hardware device context\n");
         exit(1);
     }
 
-    if (av_hwdevice_ctx_create(device_ctx, AV_HWDEVICE_TYPE_CUDA,
-                               hardware_accelerated_devices[0].c_str(), NULL,
-                               0) < 0) {
-        fprintf(stderr,
-                "Error: Failed to create hardware device context for gpu: %s\n",
-                hardware_accelerated_devices[0].c_str());
+    AVHWDeviceContext *hw_device_context = (AVHWDeviceContext *)(*device_ctx)->data;
+    AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext *)hw_device_context->hwctx;
+    cuda_device_context->cuda_ctx = cuda_context;
+    if(av_hwdevice_ctx_init(*device_ctx) < 0) {
+        fprintf(stderr, "Error: Failed to create hardware device context\n");
         exit(1);
     }
 
@@ -576,21 +558,11 @@ static void open_video(AVCodecContext *codec_context,
         exit(1);
     }
 
-    AVHWDeviceContext *hw_device_context =
-        (AVHWDeviceContext *)(*device_ctx)->data;
-    AVCUDADeviceContext *cuda_device_context =
-        (AVCUDADeviceContext *)hw_device_context->hwctx;
-    CUcontext *cuda_context = &(cuda_device_context->cuda_ctx);
-    if (!cuda_context) {
-        fprintf(stderr, "Error: No cuda context\n");
-        exit(1);
-    }
-
     if(window_pixmap.target_texture_id != 0) {
         CUresult res;
         CUcontext old_ctx;
         res = cuCtxPopCurrent(&old_ctx);
-        res = cuCtxPushCurrent(*cuda_context);
+        res = cuCtxPushCurrent(cuda_context);
         res = cuGraphicsGLRegisterImage(
             cuda_graphics_resource, window_pixmap.target_texture_id, GL_TEXTURE_2D,
             CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY);
@@ -930,21 +902,34 @@ int main(int argc, char **argv) {
 
     res = cuInit(0);
     if(res != CUDA_SUCCESS) {
-        fprintf(stderr, "Error: cuInit failed (result: %d)\n", res);
-        return {};
+        const char *err_str;
+        cuGetErrorString(res, &err_str);
+        fprintf(stderr, "Error: cuInit failed, error %s (result: %d)\n", err_str, res);
+        return 1;
+    }
+
+    int nGpu = 0;
+    cuDeviceGetCount(&nGpu);
+    if (nGpu <= 0) {
+        fprintf(stderr, "Error: no cuda supported devices found\n");
+        return 1;
     }
 
     CUdevice cu_dev;
     res = cuDeviceGet(&cu_dev, 0);
     if(res != CUDA_SUCCESS) {
-        fprintf(stderr, "Unable to get CUDA device (result: %d)\n", res);
+        const char *err_str;
+        cuGetErrorString(res, &err_str);
+        fprintf(stderr, "Error: unable to get CUDA device, error: %s (result: %d)\n", err_str, res);
         return 1;
     }
 
     CUcontext cu_ctx;
     res = cuCtxCreate_v2(&cu_ctx, CU_CTX_SCHED_AUTO, cu_dev);
     if(res != CUDA_SUCCESS) {
-        fprintf(stderr, "Unable to create CUDA context (result: %d)\n", res);
+        const char *err_str;
+        cuGetErrorString(res, &err_str);
+        fprintf(stderr, "Error: unable to create CUDA context, error: %s (result: %d)\n", err_str, res);
         return 1;
     }
 
@@ -1124,7 +1109,7 @@ int main(int argc, char **argv) {
 
     AVBufferRef *device_ctx;
     CUgraphicsResource cuda_graphics_resource;
-    open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource);
+    open_video(video_codec_context, window_pixmap, &device_ctx, &cuda_graphics_resource, cu_ctx);
     if(video_stream)
         avcodec_parameters_from_context(video_stream->codecpar, video_codec_context);
 
@@ -1161,16 +1146,6 @@ int main(int argc, char **argv) {
         }
     }
 
-    AVHWDeviceContext *hw_device_context =
-        (AVHWDeviceContext *)device_ctx->data;
-    AVCUDADeviceContext *cuda_device_context =
-        (AVCUDADeviceContext *)hw_device_context->hwctx;
-    CUcontext *cuda_context = &(cuda_device_context->cuda_ctx);
-    if (!cuda_context) {
-        fprintf(stderr, "Error: No cuda context\n");
-        exit(1);
-    }
-
     // av_frame_free(&rgb_frame);
     // avcodec_close(av_codec_context);
 
@@ -1195,7 +1170,7 @@ int main(int argc, char **argv) {
     CUarray mapped_array;
     if(src_window_id) {
         res = cuCtxPopCurrent(&old_ctx);
-        res = cuCtxPushCurrent(*cuda_context);
+        res = cuCtxPushCurrent(cu_ctx);
 
         // Get texture
         res = cuGraphicsResourceSetMapFlags(
@@ -1431,6 +1406,8 @@ int main(int argc, char **argv) {
                     // int err = glGetError();
                     // fprintf(stderr, "error: %d\n", err);
 
+                    // TODO: Remove this copy, which is only possible by using nvenc directly and encoding window_pixmap.target_texture_id
+
                     CUDA_MEMCPY2D memcpy_struct;
                     memcpy_struct.srcXInBytes = 0;
                     memcpy_struct.srcY = 0;
@@ -1449,11 +1426,11 @@ int main(int argc, char **argv) {
 
                     frame_captured = true;
                 } else {
-                    uint32_t byte_size;
-                    CUdeviceptr src_cu_device_ptr;
+                    // TODO: Check when src_cu_device_ptr changes and re-register resource
+                    uint32_t byte_size = 0;
+                    CUdeviceptr src_cu_device_ptr = 0;
                     frame_captured = nv_fbc_library.capture(&src_cu_device_ptr, &byte_size);
-                    if(frame_captured)
-                        cuMemcpyDtoD((CUdeviceptr)frame->data[0], src_cu_device_ptr, byte_size);
+                    frame->data[0] = (uint8_t*)src_cu_device_ptr;
                 }
                 // res = cuCtxPopCurrent(&old_ctx);
             }