From 919890b7b2eb16fc57439e6c9b8b28183febc467 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 20 Sep 2022 03:39:15 +0200 Subject: Fix replay video/audio desync, fix dummy audio when dropping audio input, give each audio stream a new name so it can be replaced with pipewire graphs --- TODO | 2 +- build.sh | 6 +- install_ubuntu.sh | 6 +- project.conf | 6 +- src/main.cpp | 129 ++++++++++-------------- src/sound.cpp | 292 ++++++++++++++++++++++++++++++++++++------------------ 6 files changed, 255 insertions(+), 186 deletions(-) diff --git a/TODO b/TODO index a5739fe..4472588 100644 --- a/TODO +++ b/TODO @@ -15,4 +15,4 @@ Use mov+faststart. Allow recording all monitors/selected monitor without nvfbc by recording the compositor proxy window and only recording the part that matches the monitor(s). Allow recording a region by recording the compositor proxy window / nvfbc window and copying part of it. Resizing the target window to be smaller than the initial size is buggy. The window texture ends up duplicated in the video. -Handle frames (especially for applications with rounded client-side decorations, such as gnome applications. They are huge). +Handle frames (especially for applications with rounded client-side decorations, such as gnome applications. They are huge). \ No newline at end of file diff --git a/build.sh b/build.sh index 9bdaf16..3d57932 100755 --- a/build.sh +++ b/build.sh @@ -1,8 +1,8 @@ #!/bin/sh -e -dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample" +dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample" includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include" libs="$(pkg-config --libs $dependencies) /usr/lib64/libcuda.so -ldl -pthread -lm" -g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1 -g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1 +g++ -c src/sound.cpp -O2 $includes +g++ -c src/main.cpp -O2 $includes g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs diff --git a/install_ubuntu.sh b/install_ubuntu.sh index e443c4c..251da8c 100755 --- a/install_ubuntu.sh +++ b/install_ubuntu.sh @@ -15,11 +15,11 @@ apt-get -y install build-essential nvidia-cuda-dev\ libglew-dev libglfw3-dev\ libpulse-dev -dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse-simple libswresample" +dependencies="glew libavcodec libavformat libavutil x11 xcomposite glfw3 libpulse libswresample" includes="$(pkg-config --cflags $dependencies) -I/opt/cuda/targets/x86_64-linux/include" libs="$(pkg-config --libs $dependencies) /usr/lib/x86_64-linux-gnu/stubs/libcuda.so -ldl -pthread -lm" -g++ -c src/sound.cpp -O2 $includes -DPULSEAUDIO=1 -g++ -c src/main.cpp -O2 $includes -DPULSEAUDIO=1 +g++ -c src/sound.cpp -O2 $includes +g++ -c src/main.cpp -O2 $includes g++ -o gpu-screen-recorder -O2 sound.o main.o -s $libs install -Dm755 "gpu-screen-recorder" "/usr/local/bin/gpu-screen-recorder" diff --git a/project.conf b/project.conf index 4532649..dafca97 100644 --- a/project.conf +++ b/project.conf @@ -8,9 +8,6 @@ platforms = ["posix"] include_dirs = ["/opt/cuda/targets/x86_64-linux/include"] libs = ["/usr/lib64/libcuda.so"] -[define] -PULSEAUDIO = "1" - [dependencies] glew = ">=2" libavcodec = ">=58" @@ -23,7 +20,6 @@ xcomposite = ">=0.2" # TODO: Remove this dependency, this is needed right now for glfwMakeContextCurrent glfw3 = "3" -#alsa = "1" -libpulse-simple = ">=13" +libpulse = ">=13" libswresample = ">=3" diff --git a/src/main.cpp b/src/main.cpp index 0ab002f..9db23e4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -62,7 +62,6 @@ extern "C" { #include #include -#include //#include @@ -434,7 +433,7 @@ static void receive_frames(AVCodecContext *av_codec_context, int stream_index, A } else { av_packet_rescale_ts(&av_packet, av_codec_context->time_base, stream->time_base); av_packet.stream_index = stream->index; - int ret = av_interleaved_write_frame(av_format_context, &av_packet); + int ret = av_write_frame(av_format_context, &av_packet); if(ret < 0) { fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", av_packet.stream_index, av_error_to_string(ret), ret); } @@ -810,33 +809,51 @@ static std::future save_replay_thread; static std::vector save_replay_packets; static std::string save_replay_output_filepath; -static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector &audio_tracks, const std::deque &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format) { +static void save_replay_async(AVCodecContext *video_codec_context, int video_stream_index, std::vector &audio_tracks, const std::deque &frame_data_queue, bool frames_erased, std::string output_dir, std::string container_format, std::mutex &write_output_mutex) { if(save_replay_thread.valid()) return; size_t start_index = (size_t)-1; - for(size_t i = 0; i < frame_data_queue.size(); ++i) { - const AVPacket &av_packet = frame_data_queue[i]; - if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) { - start_index = i; - break; + int64_t video_pts_offset = 0; + int64_t audio_pts_offset = 0; + + { + std::lock_guard lock(write_output_mutex); + start_index = (size_t)-1; + for(size_t i = 0; i < frame_data_queue.size(); ++i) { + const AVPacket &av_packet = frame_data_queue[i]; + if((av_packet.flags & AV_PKT_FLAG_KEY) && av_packet.stream_index == video_stream_index) { + start_index = i; + break; + } } - } - if(start_index == (size_t)-1) - return; + if(start_index == (size_t)-1) + return; - int64_t pts_offset = 0; - if(frames_erased) - pts_offset = frame_data_queue[start_index].pts; + if(frames_erased) { + video_pts_offset = frame_data_queue[start_index].pts; + + // Find the next audio packet to use as audio pts offset + for(size_t i = start_index; i < frame_data_queue.size(); ++i) { + const AVPacket &av_packet = frame_data_queue[i]; + if(av_packet.stream_index != video_stream_index) { + audio_pts_offset = av_packet.pts; + break; + } + } + } else { + start_index = 0; + } - save_replay_packets.resize(frame_data_queue.size()); - for(size_t i = 0; i < frame_data_queue.size(); ++i) { - av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]); + save_replay_packets.resize(frame_data_queue.size()); + for(size_t i = 0; i < frame_data_queue.size(); ++i) { + av_packet_ref(&save_replay_packets[i], &frame_data_queue[i]); + } } save_replay_output_filepath = output_dir + "/Replay_" + get_date_str() + "." + container_format; - save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, pts_offset, video_codec_context, &audio_tracks]() mutable { + save_replay_thread = std::async(std::launch::async, [video_stream_index, container_format, start_index, video_pts_offset, audio_pts_offset, video_codec_context, &audio_tracks]() mutable { AVFormatContext *av_format_context; // The output format is automatically guessed from the file extension avformat_alloc_output_context2(&av_format_context, nullptr, container_format.c_str(), nullptr); @@ -874,18 +891,22 @@ static void save_replay_async(AVCodecContext *video_codec_context, int video_str AVStream *stream = video_stream; AVCodecContext *codec_context = video_codec_context; - if(av_packet.stream_index != video_stream_index) { + if(av_packet.stream_index == video_stream_index) { + av_packet.pts -= video_pts_offset; + av_packet.dts -= video_pts_offset; + } else { AudioTrack *audio_track = stream_index_to_audio_track_map[av_packet.stream_index]; stream = audio_track->stream; codec_context = audio_track->codec_context; + + av_packet.pts -= audio_pts_offset; + av_packet.dts -= audio_pts_offset; } av_packet.stream_index = stream->index; - av_packet.pts -= pts_offset; - av_packet.dts -= pts_offset; av_packet_rescale_ts(&av_packet, codec_context->time_base, stream->time_base); - int ret = av_interleaved_write_frame(av_format_context, &av_packet); + int ret = av_write_frame(av_format_context, &av_packet); if(ret < 0) fprintf(stderr, "Error: Failed to write frame index %d to muxer, reason: %s (%d)\n", stream->index, av_error_to_string(ret), ret); } @@ -1426,52 +1447,11 @@ int main(int argc, char **argv) { av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0); swr_init(swr); - std::deque buffered_audio; - std::mutex buffered_audio_mutex; - std::condition_variable buffered_audio_cv; - bool got_first_batch = false; - - // TODO: Make the sound device read async instead of using a thread - std::thread sound_read_thread([&](){ - while(running) { - void *sound_buffer; - int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer); - if(sound_buffer_size >= 0) { - uint8_t *data = (uint8_t*)malloc(audio_track.sound_device.buffer_size); - if(data) { - memcpy(data, sound_buffer, audio_track.sound_device.buffer_size); - std::unique_lock lock(buffered_audio_mutex); - buffered_audio.push_back(data); - buffered_audio_cv.notify_one(); - } - } - } - }); - while(running) { - uint8_t *audio_buffer; - bool free_audio; - { - // TODO: Not a good solution to lack of audio as it causes dropped frames, but it's better then complete audio desync. - // The first packet is delayed for some reason... - std::unique_lock lock(buffered_audio_mutex); - if(got_first_batch) - buffered_audio_cv.wait(lock, [&]{ return !running || !buffered_audio.empty(); }); - else - buffered_audio_cv.wait_for(lock, std::chrono::milliseconds(21), [&]{ return !running || !buffered_audio.empty(); }); - if(!running) - break; - - if(buffered_audio.empty()) { - audio_buffer = empty_audio; - free_audio = false; - } else { - audio_buffer = buffered_audio.front(); - buffered_audio.pop_front(); - free_audio = true; - got_first_batch = true; - } - } + void *sound_buffer; + int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer); + if(sound_buffer_size < 0) + sound_buffer = empty_audio; int ret = av_frame_make_writable(audio_track.frame); if (ret < 0) { @@ -1480,7 +1460,7 @@ int main(int argc, char **argv) { } // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format? - swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&audio_buffer, audio_track.sound_device.frames); + swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames); audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE; ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame); @@ -1489,17 +1469,9 @@ int main(int argc, char **argv) { } else { fprintf(stderr, "Failed to encode audio!\n"); } - - if(free_audio) - free(audio_buffer); - } - - sound_read_thread.join(); - while(!buffered_audio.empty()) { - free(buffered_audio.front()); - buffered_audio.pop_front(); } + sound_device_close(&audio_track.sound_device); swr_free(&swr); }, av_format_context, &write_output_mutex); } @@ -1749,7 +1721,7 @@ int main(int argc, char **argv) { if(save_replay == 1 && !save_replay_thread.valid() && replay_buffer_size_secs != -1) { save_replay = 0; - save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format); + save_replay_async(video_codec_context, VIDEO_STREAM_INDEX, audio_tracks, frame_data_queue, frames_erased, filename, container_format, write_output_mutex); } // av_frame_free(&frame); @@ -1767,7 +1739,6 @@ int main(int argc, char **argv) { for(AudioTrack &audio_track : audio_tracks) { audio_track.thread.join(); - sound_device_close(&audio_track.sound_device); } if (replay_buffer_size_secs == -1 && av_write_trailer(av_format_context) != 0) { diff --git a/src/sound.cpp b/src/sound.cpp index d0b5033..9ca1381 100644 --- a/src/sound.cpp +++ b/src/sound.cpp @@ -20,11 +20,193 @@ #include #include #include +#include +#include -#ifdef PULSEAUDIO -#include +#include +#include +#include #include +#define CHECK_DEAD_GOTO(p, rerror, label) \ + do { \ + if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \ + !(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \ + if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \ + ((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \ + if (rerror) \ + *(rerror) = pa_context_errno((p)->context); \ + } else \ + if (rerror) \ + *(rerror) = PA_ERR_BADSTATE; \ + goto label; \ + } \ + } while(false); + +static double clock_get_monotonic_seconds() { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 0; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; +} + +static int sound_device_index = 0; + +struct pa_handle { + pa_context *context; + pa_stream *stream; + pa_mainloop *mainloop; + + const void *read_data; + size_t read_index, read_length; + + int operation_success; +}; + +static void pa_sound_device_free(pa_handle *s) { + assert(s); + + if (s->stream) + pa_stream_unref(s->stream); + + if (s->context) { + pa_context_disconnect(s->context); + pa_context_unref(s->context); + } + + if (s->mainloop) + pa_mainloop_free(s->mainloop); + + pa_xfree(s); +} + +static pa_handle* pa_sound_device_new(const char *server, + const char *name, + const char *dev, + const char *stream_name, + const pa_sample_spec *ss, + const pa_buffer_attr *attr, + int *rerror) { + pa_handle *p; + int error = PA_ERR_INTERNAL, r; + + p = pa_xnew0(pa_handle, 1); + + if (!(p->mainloop = pa_mainloop_new())) + goto fail; + + if (!(p->context = pa_context_new(pa_mainloop_get_api(p->mainloop), name))) + goto fail; + + if (pa_context_connect(p->context, server, PA_CONTEXT_NOFLAGS, NULL) < 0) { + error = pa_context_errno(p->context); + goto fail; + } + + for (;;) { + pa_context_state_t state = pa_context_get_state(p->context); + + if (state == PA_CONTEXT_READY) + break; + + if (!PA_CONTEXT_IS_GOOD(state)) { + error = pa_context_errno(p->context); + goto fail; + } + + pa_mainloop_iterate(p->mainloop, 1, NULL); + } + + if (!(p->stream = pa_stream_new(p->context, stream_name, ss, NULL))) { + error = pa_context_errno(p->context); + goto fail; + } + + r = pa_stream_connect_record(p->stream, dev, attr, + (pa_stream_flags_t)(PA_STREAM_INTERPOLATE_TIMING|PA_STREAM_ADJUST_LATENCY|PA_STREAM_AUTO_TIMING_UPDATE)); + + if (r < 0) { + error = pa_context_errno(p->context); + goto fail; + } + + for (;;) { + pa_stream_state_t state = pa_stream_get_state(p->stream); + + if (state == PA_STREAM_READY) + break; + + if (!PA_STREAM_IS_GOOD(state)) { + error = pa_context_errno(p->context); + goto fail; + } + + pa_mainloop_iterate(p->mainloop, 1, NULL); + } + + return p; + +fail: + if (rerror) + *rerror = error; + pa_sound_device_free(p); + return NULL; +} + +// Returns a negative value on failure. Always blocks a time specified matching the sampling rate of the audio. +static int pa_sound_device_read(pa_handle *p, void *data, size_t length) { + assert(p); + + int r = 0; + int *rerror = &r; + bool retry = true; + + pa_mainloop_iterate(p->mainloop, 0, NULL); + const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0); + + CHECK_DEAD_GOTO(p, rerror, fail); + + while(true) { + if(pa_stream_readable_size(p->stream) < length) { + if(!retry) + break; + + retry = false; + + const double start_time = clock_get_monotonic_seconds(); + while((clock_get_monotonic_seconds() - start_time) * 1000.0 < timeout_ms) { + pa_mainloop_prepare(p->mainloop, 1 * 1000); + pa_mainloop_poll(p->mainloop); + pa_mainloop_dispatch(p->mainloop); + } + + continue; + } + + r = pa_stream_peek(p->stream, &p->read_data, &p->read_length); + if(r != 0) { + if(retry) + usleep(timeout_ms * 1000); + return -1; + } + + if(p->read_length < length || !p->read_data) { + pa_stream_drop(p->stream); + if(retry) + usleep(timeout_ms * 1000); + return -1; + } + + memcpy(data, p->read_data, length); + pa_stream_drop(p->stream); + return 0; + } + + fail: + return -1; +} + int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) { pa_sample_spec ss; ss.format = PA_SAMPLE_S16LE; @@ -39,8 +221,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int buffer_attr.maxlength = period_frame_size * 2 * num_channels; // 2 bytes/sample, @num_channels channels buffer_attr.fragsize = buffer_attr.maxlength; - pa_simple *pa_handle = pa_simple_new(nullptr, "gpu-screen-recorder", PA_STREAM_RECORD, name, "record", &ss, nullptr, &buffer_attr, &error); - if(!pa_handle) { + // We want a unique stream name for every device which allows each input to be a different box in pipewire graph software + char stream_name[64]; + snprintf(stream_name, sizeof(stream_name), "record-%d", sound_device_index); + ++sound_device_index; + + pa_handle *handle = pa_sound_device_new(nullptr, "gpu-screen-recorder", name, stream_name, &ss, &buffer_attr, &error); + if(!handle) { fprintf(stderr, "pa_simple_new() failed: %s. Audio input device %s might not be valid\n", pa_strerror(error), name); return -1; } @@ -49,13 +236,13 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int void *buffer = malloc(buffer_size); if(!buffer) { fprintf(stderr, "failed to allocate buffer for audio\n"); - pa_simple_free(pa_handle); + pa_sound_device_free(handle); return -1; } fprintf(stderr, "Using pulseaudio\n"); - device->handle = pa_handle; + device->handle = handle; device->buffer = buffer; device->buffer_size = buffer_size; device->frames = period_frame_size; @@ -63,100 +250,15 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int } void sound_device_close(SoundDevice *device) { - pa_simple_free((pa_simple*)device->handle); + pa_sound_device_free((pa_handle*)device->handle); free(device->buffer); } int sound_device_read_next_chunk(SoundDevice *device, void **buffer) { - int error = 0; - if(pa_simple_read((pa_simple*)device->handle, device->buffer, device->buffer_size, &error) < 0) { - fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error)); + if(pa_sound_device_read((pa_handle*)device->handle, device->buffer, device->buffer_size) < 0) { + //fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error)); return -1; } *buffer = device->buffer; return device->frames; -} -#else -#define ALSA_PCM_NEW_HW_PARAMS_API -#include - -int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) { - int rc; - snd_pcm_t *handle; - - rc = snd_pcm_open(&handle, name, SND_PCM_STREAM_CAPTURE, 0); - if(rc < 0) { - fprintf(stderr, "unable to open pcm device 'default', reason: %s\n", snd_strerror(rc)); - return rc; - } - - snd_pcm_hw_params_t *params; - snd_pcm_hw_params_alloca(¶ms); - // Fill the params with default values - snd_pcm_hw_params_any(handle, params); - // Interleaved mode - snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); - // Signed 16--bit little-endian format - snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); - snd_pcm_hw_params_set_channels(handle, params, num_channels); - - // 48000 bits/second samling rate (DVD quality) - unsigned int val = 48000; - int dir; - snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir); - - snd_pcm_uframes_t frames = period_frame_size; - snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir); - - // Write the parmeters to the driver - rc = snd_pcm_hw_params(handle, params); - if(rc < 0) { - fprintf(stderr, "unable to set hw parameters, reason: %s\n", snd_strerror(rc)); - snd_pcm_close(handle); - return rc; - } - - // Use a buffer large enough to hold one period - snd_pcm_hw_params_get_period_size(params, &frames, &dir); - int buffer_size = frames * 2 * num_channels; // 2 bytes/sample, @num_channels channels - void *buffer = malloc(buffer_size); - if(!buffer) { - fprintf(stderr, "failed to allocate buffer for audio\n"); - snd_pcm_close(handle); - return -1; - } - - fprintf(stderr, "Using alsa\n"); - - device->handle = handle; - device->buffer = buffer; - device->buffer_size = buffer_size; - device->frames = frames; - return 0; -} - -void sound_device_close(SoundDevice *device) { - /* TODO: Is this also needed in @sound_device_get_by_name on failure? */ - // TODO: This has been commented out since it causes the thread to block forever. Why? - //snd_pcm_drain((snd_pcm_t*)device->handle); - snd_pcm_close((snd_pcm_t*)device->handle); - free(device->buffer); -} - -int sound_device_read_next_chunk(SoundDevice *device, void **buffer) { - int rc = snd_pcm_readi((snd_pcm_t*)device->handle, device->buffer, device->frames); - if (rc == -EPIPE) { - /* overrun */ - fprintf(stderr, "overrun occured\n"); - snd_pcm_prepare((snd_pcm_t*)device->handle); - return rc; - } else if(rc < 0) { - fprintf(stderr, "failed to read from sound device, reason: %s\n", snd_strerror(rc)); - return rc; - } else if (rc != (int)device->frames) { - fprintf(stderr, "short read, read %d frames\n", rc); - } - *buffer = device->buffer; - return rc; -} -#endif +} \ No newline at end of file -- cgit v1.2.3