diff options
Diffstat (limited to 'src/main.cpp')
-rw-r--r-- | src/main.cpp | 564 |
1 files changed, 564 insertions, 0 deletions
diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..28fa725 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,564 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string> +#include <vector> + +#define GLX_GLXEXT_PROTOTYPES +#include <GL/glew.h> +#include <GLFW/glfw3.h> +#include <GL/glx.h> +#include <GL/glxext.h> + +#include <X11/extensions/Xcomposite.h> +#include <X11/extensions/Xdamage.h> + +// TODO: Use opencl or vulkan instead +#include <ffnvcodec/nvEncodeAPI.h> +//#include <ffnvcodec/dynlink_cuda.h> +extern "C" { +#include <libavutil/hwcontext_cuda.h> +#include <libavcodec/avcodec.h> +#include <libavformat/avformat.h> +} +#include <cudaGL.h> + +extern "C" { +#include <libavutil/hwcontext.h> +} + +#include <CL/cl.h> + +struct ScopedGLXFBConfig { + ~ScopedGLXFBConfig() { + if(configs) + XFree(configs); + } + + GLXFBConfig *configs = nullptr; +}; + +struct WindowPixmap { + WindowPixmap() : pixmap(None), glx_pixmap(None), texture_id(0), texture_width(0), texture_height(0) { + + } + + Pixmap pixmap; + GLXPixmap glx_pixmap; + GLuint texture_id; + + GLint texture_width; + GLint texture_height; + + GLuint dst_texture_id; +}; + +static bool x11_supports_composite_named_window_pixmap(Display *dpy) { + int extension_major; + int extension_minor; + if(!XCompositeQueryExtension(dpy, &extension_major, &extension_minor)) + return false; + + int major_version; + int minor_version; + return XCompositeQueryVersion(dpy, &major_version, &minor_version) && (major_version > 0 || minor_version >= 2); +} + +static void cleanup_window_pixmap(Display *dpy, WindowPixmap &pixmap) { + if(pixmap.texture_id) { + glDeleteTextures(1, &pixmap.texture_id); + pixmap.texture_id = 0; + pixmap.texture_width = 0; + pixmap.texture_height = 0; + } + + if(pixmap.glx_pixmap) { + glXReleaseTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT); + glXDestroyPixmap(dpy, pixmap.glx_pixmap); + pixmap.glx_pixmap = None; + } + + if(pixmap.pixmap) { + XFreePixmap(dpy, pixmap.pixmap); + pixmap.pixmap = None; + } +} + +static bool recreate_window_pixmap(Display *dpy, Window window_id, WindowPixmap &pixmap) { + cleanup_window_pixmap(dpy, pixmap); + + const int pixmap_config[] = { + GLX_BIND_TO_TEXTURE_RGBA_EXT, True, + GLX_DRAWABLE_TYPE, GLX_PIXMAP_BIT, + GLX_BIND_TO_TEXTURE_TARGETS_EXT, GLX_TEXTURE_2D_BIT_EXT, + //GLX_BIND_TO_MIPMAP_TEXTURE_EXT, True, + GLX_DOUBLEBUFFER, False, + GLX_Y_INVERTED_EXT, (int)GLX_DONT_CARE, + None + }; + + const int pixmap_attribs[] = { + GLX_TEXTURE_TARGET_EXT, GLX_TEXTURE_2D_EXT, + GLX_TEXTURE_FORMAT_EXT, GLX_TEXTURE_FORMAT_RGB_EXT, + GLX_MIPMAP_TEXTURE_EXT, 0, + None + }; + + int c; + GLXFBConfig *configs = glXChooseFBConfig(dpy, 0, pixmap_config, &c); + if(!configs) { + fprintf(stderr, "Failed too choose fb config\n"); + return false; + } + ScopedGLXFBConfig scoped_configs; + scoped_configs.configs = configs; + + Pixmap new_window_pixmap = XCompositeNameWindowPixmap(dpy, window_id); + if(!new_window_pixmap) { + fprintf(stderr, "Failed to get pixmap for window %ld\n", window_id); + return false; + } + + GLXPixmap glx_pixmap = glXCreatePixmap(dpy, *configs, new_window_pixmap, pixmap_attribs); + if(!glx_pixmap) { + fprintf(stderr, "Failed to create glx pixmap\n"); + XFreePixmap(dpy, new_window_pixmap); + return false; + } + + pixmap.pixmap = new_window_pixmap; + pixmap.glx_pixmap = glx_pixmap; + + glEnable(GL_TEXTURE_2D); + glGenTextures(1, &pixmap.texture_id); + glBindTexture(GL_TEXTURE_2D, pixmap.texture_id); + + //glEnable(GL_BLEND); + //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + glXBindTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT, NULL); + glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &pixmap.texture_width); + glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &pixmap.texture_height); + printf("texture width: %d, height: %d\n", pixmap.texture_width, pixmap.texture_height); + //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, pixmap.texture_width, pixmap.texture_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + //glXBindTexImageEXT(dpy, pixmap.glx_pixmap, GLX_FRONT_EXT, NULL); + //glGenerateTextureMipmapEXT(glxpixmap, GL_TEXTURE_2D); + + //glGenerateMipmap(GL_TEXTURE_2D); + + //glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + //glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);//GL_LINEAR ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);//GL_LINEAR);//GL_LINEAR_MIPMAP_LINEAR ); + glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); + + glBindTexture(GL_TEXTURE_2D, 0); + + glGenTextures(1, &pixmap.dst_texture_id); + glBindTexture(GL_TEXTURE_2D, pixmap.dst_texture_id); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, pixmap.texture_width, pixmap.texture_height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); + glCopyImageSubData(pixmap.texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, pixmap.dst_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, pixmap.texture_width, pixmap.texture_height, 0); + glBindTexture(GL_TEXTURE_2D, 0); + + return pixmap.texture_id != 0 && pixmap.dst_texture_id != 0; +} + +std::vector<std::string> get_hardware_acceleration_device_names() { + #if 1 + std::vector<std::string> result; + + cl_uint platform_count = 0; + clGetPlatformIDs(0, nullptr, &platform_count); + cl_platform_id *platforms = new cl_platform_id[platform_count]; + clGetPlatformIDs(platform_count, platforms, nullptr); + + for(cl_uint i = 0; i < platform_count; ++i) { + cl_uint device_count = 0; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, nullptr, &device_count); + cl_device_id *devices = new cl_device_id[device_count]; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, nullptr); + + for(cl_uint j = 0; j < device_count; ++j) { + size_t value_size = 0; + clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, nullptr, &value_size); + std::string device_name(value_size, 0); + clGetDeviceInfo(devices[j], CL_DEVICE_NAME, value_size, &device_name[0], nullptr); + printf("Device: %s\n", device_name.c_str()); + result.push_back(std::move(device_name)); + } + + delete []devices; + } + + delete []platforms; + return result; + #else + int iGpu = 0; + CUresult res; + if(cuInit(0) < 0) { + fprintf(stderr, "Error: cuInit failed\n"); + return {}; + } + + int nGpu = 0; + cuDeviceGetCount(&nGpu); + if(iGpu < 0 || iGpu >= nGpu) { + fprintf(stderr, "Error: failed...\n"); + return {}; + } + + CUdevice cuDevice = 0; + cuDeviceGet(&cuDevice, iGpu); + char deviceName[80]; + cuDeviceGetName(deviceName, sizeof(deviceName), cuDevice); + printf("device name: %s\n", deviceName); + return { deviceName }; + #endif +} + +static inline double ToDouble(const AVRational& r) { + return (double) r.num / (double) r.den; +} + +static void receive_frames(AVCodecContext *av_codec_context, AVStream *stream, AVFormatContext *av_format_context) { + for( ; ; ) { + AVPacket *av_packet = new AVPacket; + av_init_packet(av_packet); + av_packet->data = NULL; + av_packet->size = 0; + int res = avcodec_receive_packet(av_codec_context, av_packet); + if(res == 0) { // we have a packet, send the packet to the muxer + printf("Received packet!\n"); + printf("data: %p, size: %d, pts: %ld\n", (void*)av_packet->data, av_packet->size, av_packet->pts); + printf("timebase: %f\n", ToDouble(stream->time_base)); + + // prepare packet + av_packet_rescale_ts(av_packet, av_codec_context->time_base, stream->time_base); + av_packet->stream_index = stream->index; +/* + if(av_packet->pts != (int64_t) AV_NOPTS_VALUE) { + av_packet->pts = av_rescale_q(av_packet->pts, av_codec_context->time_base, stream->time_base); + } + if(av_packet->dts != (int64_t) AV_NOPTS_VALUE) { + av_packet->dts = av_rescale_q(av_packet->dts, av_codec_context->time_base, stream->time_base); + } +*/ + if(av_interleaved_write_frame(av_format_context, av_packet) < 0) { + fprintf(stderr, "Error: Failed to write frame to muxer\n"); + } + //av_packet_unref(&av_packet); + } else if(res == AVERROR(EAGAIN)) { // we have no packet + //printf("No packet!\n"); + break; + } else if(res == AVERROR_EOF) { // this is the end of the stream + printf("End of stream!\n"); + break; + } else { + printf("Unexpected error: %d\n", res); + break; + } + } + //av_packet_unref(&av_packet); +} + +int main(int argc, char **argv) { + if(argc < 2) { + fprintf(stderr, "usage: hardware-screen-recorder <window_id>\n"); + return 1; + } + + Window src_window_id = atoi(argv[1]); + + Display *dpy = XOpenDisplay(nullptr); + if(!dpy) { + fprintf(stderr, "Error: Failed to open display\n"); + return 1; + } + + bool has_name_pixmap = x11_supports_composite_named_window_pixmap(dpy); + if(!has_name_pixmap) { + fprintf(stderr, "Error: XCompositeNameWindowPixmap is not supported by your X11 server\n"); + return 1; + } + + // TODO: Verify if this is needed + int screen_count = ScreenCount(dpy); + for(int i = 0; i < screen_count; ++i) { + XCompositeRedirectSubwindows(dpy, RootWindow(dpy, i), CompositeRedirectAutomatic); + } + + XWindowAttributes attr; + if(!XGetWindowAttributes(dpy, src_window_id, &attr)) { + fprintf(stderr, "Error: Invalid window id: %lu\n", src_window_id); + return 1; + } + + //glXMakeContextCurrent(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) + if(!glfwInit()) { + fprintf(stderr, "Error: Failed to initialize glfw\n"); + return 1; + } + + GLFWwindow *window = glfwCreateWindow(1920, 1080, "Hello world", nullptr, nullptr); + if(!window) { + fprintf(stderr, "Error: Failed to create glfw window\n"); + glfwTerminate(); + return 1; + } + + glfwMakeContextCurrent(window); + + glewExperimental = GL_TRUE; + GLenum nGlewError = glewInit(); + if (nGlewError != GLEW_OK) { + fprintf(stderr, "%s - Error initializing GLEW! %s\n", __FUNCTION__, glewGetErrorString(nGlewError)); + return 1; + } + glGetError(); // to clear the error caused deep in GLEW + + WindowPixmap window_pixmap; + if(!recreate_window_pixmap(dpy, src_window_id, window_pixmap)) { + fprintf(stderr, "Error: Failed to create glx pixmap for window: %lu\n", src_window_id); + return 1; + } + + //avcodec_register_all(); + AVCodec *av_codec = avcodec_find_encoder_by_name("h264_nvenc"); //avcodec_find_encoder(AV_CODEC_ID_H264); + if(!av_codec) { + fprintf(stderr, "Error: No encoder was found for codec h264\n"); + return 1; + } + + AVOutputFormat *format = av_guess_format("mp4", nullptr, nullptr); + if(!format) { + fprintf(stderr, "Error: Invalid format: mp4\n"); + return 1; + } + AVFormatContext *av_format_context = avformat_alloc_context(); + av_format_context->oformat = format; + if(avio_open(&av_format_context->pb, "test_new.mp4", AVIO_FLAG_WRITE) < 0) { + fprintf(stderr, "Error: Failed to open output file: test_new.mp4"); + return 1; + } + + AVStream *stream = avformat_new_stream(av_format_context, av_codec); + if(!stream) { + fprintf(stderr, "Error: Failed to create stream\n"); + return 1; + } + stream->id = av_format_context->nb_streams - 1; + + AVCodecContext *av_codec_context = avcodec_alloc_context3(av_codec); + if(avcodec_get_context_defaults3(av_codec_context, av_codec) < 0) { + fprintf(stderr, "Error: Failed to get av codec context defaults\n"); + return 1; + } + av_codec_context->codec_id = av_codec->id; + av_codec_context->codec_type = av_codec->type; + if(av_format_context->oformat->flags & AVFMT_GLOBALHEADER) + av_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + + av_codec_context->time_base.num = 1; + av_codec_context->time_base.den = 60; + av_codec_context->gop_size = 12; + av_codec_context->bit_rate = 400000; + av_codec_context->width = 720; // window_pixmap.texture_width + av_codec_context->height = 480; // window_pixmap.texture_height +#if SSR_USE_AVSTREAM_TIME_BASE + stream->time_base = codec_context->time_base; +#endif + av_codec_context->sample_aspect_ratio.num = 1; + av_codec_context->sample_aspect_ratio.den = 1; + av_codec_context->pix_fmt = AV_PIX_FMT_CUDA; + av_codec_context->codec_type = AVMEDIA_TYPE_VIDEO; + av_codec_context->sw_pix_fmt = AV_PIX_FMT_0BGR32; + stream->sample_aspect_ratio = av_codec_context->sample_aspect_ratio; + + std::vector<std::string> hardware_accelerated_devices = get_hardware_acceleration_device_names(); + if(hardware_accelerated_devices.empty()) { + fprintf(stderr, "Error: No hardware accelerated device was found on your system\n"); + return 1; + } + + AVBufferRef *device_ctx = nullptr; + if(av_hwdevice_ctx_create(&device_ctx, AV_HWDEVICE_TYPE_CUDA, hardware_accelerated_devices[0].c_str(), NULL, 0) < 0) { + fprintf(stderr, "Error: Failed to create hardware device context for gpu: %s\n", hardware_accelerated_devices[0].c_str()); + return 1; + } + + AVHWDeviceContext *hw_device_context = (AVHWDeviceContext*)device_ctx->data; + AVCUDADeviceContext *cuda_device_context = (AVCUDADeviceContext*)hw_device_context->hwctx; + CUcontext *cuda_context = &(cuda_device_context->cuda_ctx); + if(!cuda_context) { + fprintf(stderr, "Error: No cuda context\n"); + return 1; + } + AVBufferRef *frame_context = av_hwframe_ctx_alloc(device_ctx); + + AVHWFramesContext *hw_frame_context = (AVHWFramesContext*)frame_context->data; + hw_frame_context->width = window_pixmap.texture_width; + hw_frame_context->height = window_pixmap.texture_height; + hw_frame_context->sw_format = AV_PIX_FMT_0BGR32; + hw_frame_context->format = AV_PIX_FMT_CUDA; + hw_frame_context->device_ref = device_ctx; + hw_frame_context->device_ctx = (AVHWDeviceContext*)device_ctx->data; + + if(av_hwframe_ctx_init(frame_context) < 0) { + fprintf(stderr, "Error: Failed to initialize hardware frame context (note: ffmpeg version needs to be > 4.0\n"); + return 1; + } + + av_codec_context->hw_device_ctx = device_ctx; + av_codec_context->hw_frames_ctx = frame_context; + + if(avcodec_open2(av_codec_context, av_codec, nullptr) < 0) { + fprintf(stderr, "Error: avcodec_open2 failed\n"); + return 1; + } + + if(avcodec_parameters_from_context(stream->codecpar, av_codec_context) < 0) { + fprintf(stderr, "Error: Can't copy parameters to stream!\n"); + return 1; + } + + //AVDictionary *opts = nullptr; + //av_dict_set(&opts, "b", "2.5M", 0); + // if(avcodec_open2(av_codec_context, av_codec, nullptr) < 0) { + // fprintf(stderr, "Error: avcodec_open2 failed\n"); + // return 1; + // } + + CUresult res; + CUcontext old_ctx; + CUgraphicsResource cuda_graphics_resource; + res = cuCtxPopCurrent(&old_ctx); + res = cuCtxPushCurrent(*cuda_context); + res = cuGraphicsGLRegisterImage(&cuda_graphics_resource, window_pixmap.dst_texture_id, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY); + if(res != CUDA_SUCCESS) { + fprintf(stderr, "Error: cuGraphicsGLRegisterImage failed, error %d, texture id: %u\n", res, window_pixmap.texture_id); + return 1; + } + res = cuCtxPopCurrent(&old_ctx); + + + AVFrame *rgb_frame = av_frame_alloc(); + if(av_hwframe_get_buffer(frame_context, rgb_frame, 0) < 0) { + fprintf(stderr, "Error: av_hwframe_get_buffer failed\n"); + return 1; + } + + + // Get context + res = cuCtxPopCurrent(&old_ctx); + res = cuCtxPushCurrent(*cuda_context); + + // Get texture + res = cuGraphicsResourceSetMapFlags(cuda_graphics_resource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); + res = cuGraphicsMapResources(1, &cuda_graphics_resource, 0); + + // Map texture to cuda array + CUarray mapped_array; + res = cuGraphicsSubResourceGetMappedArray(&mapped_array, cuda_graphics_resource, 0, 0); + + // Release texture + res = cuGraphicsUnmapResources(1, &cuda_graphics_resource, 0); + + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = mapped_array; + memcpy_struct.dstDevice = (CUdeviceptr)rgb_frame->data[0]; + memcpy_struct.dstPitch = rgb_frame->linesize[0]; + memcpy_struct.WidthInBytes = rgb_frame->width * 4; + memcpy_struct.Height = rgb_frame->height; + cuMemcpy2D(&memcpy_struct); + + // Release context + res = cuCtxPopCurrent(&old_ctx); + + if(avformat_write_header(av_format_context, NULL) != 0) { + fprintf(stderr, "Error: Failed to write header\n"); + return 1; + } + + if(avcodec_send_frame(av_codec_context, rgb_frame) < 0) { + fprintf(stderr, "Error: avcodec_send_frame failed\n"); + } + + //av_frame_free(&rgb_frame); + //avcodec_close(av_codec_context); + + XSelectInput(dpy, src_window_id, StructureNotifyMask); + + int damage_event; + int damage_error; + if(!XDamageQueryExtension(dpy, &damage_event, &damage_error)) { + fprintf(stderr, "Error: XDamage is not supported by your X11 server\n"); + return 1; + } + + Damage xdamage = XDamageCreate(dpy, src_window_id, XDamageReportNonEmpty); + + XEvent e; + while (1) { + XNextEvent(dpy, &e); + if (e.type == ConfigureNotify) { + // Window resize + printf("Resize window!\n"); + recreate_window_pixmap(dpy, src_window_id, window_pixmap); + } else if (e.type == damage_event + XDamageNotify) { + XDamageNotifyEvent *de = (XDamageNotifyEvent*)&e; + // de->drawable is the window ID of the damaged window + XserverRegion region = XFixesCreateRegion(dpy, nullptr, 0); + // Subtract all the damage, repairing the window + XDamageSubtract(dpy, de->damage, None, region); + XFixesDestroyRegion(dpy, region); + + glCopyImageSubData(window_pixmap.texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, window_pixmap.dst_texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, window_pixmap.texture_width, window_pixmap.texture_height, 0); + res = cuCtxPopCurrent(&old_ctx); + res = cuCtxPushCurrent(*cuda_context); + CUDA_MEMCPY2D memcpy_struct; + memcpy_struct.srcXInBytes = 0; + memcpy_struct.srcY = 0; + memcpy_struct.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; + + memcpy_struct.dstXInBytes = 0; + memcpy_struct.dstY = 0; + memcpy_struct.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; + + memcpy_struct.srcArray = mapped_array; + memcpy_struct.dstDevice = (CUdeviceptr)rgb_frame->data[0]; + memcpy_struct.dstPitch = rgb_frame->linesize[0]; + memcpy_struct.WidthInBytes = rgb_frame->width * 4; + memcpy_struct.Height = rgb_frame->height; + cuMemcpy2D(&memcpy_struct); + res = cuCtxPopCurrent(&old_ctx); + if(avcodec_send_frame(av_codec_context, rgb_frame) < 0) { + fprintf(stderr, "Error: avcodec_send_frame failed\n"); + } + receive_frames(av_codec_context, stream, av_format_context); + } + } + + if(av_write_trailer(av_format_context) != 0) { + fprintf(stderr, "Failed to write trailer\n"); + } + + if(av_format_context->pb) + avio_close(av_format_context->pb); + avformat_free_context(av_format_context); + + XDamageDestroy(dpy, xdamage); + av_buffer_unref(&device_ctx); + avcodec_free_context(&av_codec_context); + cleanup_window_pixmap(dpy, window_pixmap); + for(int i = 0; i < screen_count; ++i) { + XCompositeUnredirectSubwindows(dpy, RootWindow(dpy, i), CompositeRedirectAutomatic); + } + XCloseDisplay(dpy); +} |