From f8101612035eee8a0772cadffb38b04138a71c28 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Tue, 20 Sep 2022 20:11:56 +0200 Subject: Is this the final solution to the audio crackling problem? increase pts by number of samples and add dummy audio frames between --- src/main.cpp | 50 ++++++++++++++++++++++++++++-------- src/sound.cpp | 81 +++++++++++------------------------------------------------ 2 files changed, 53 insertions(+), 78 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 1c02984..1b88cd9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -483,7 +483,7 @@ static AVCodecContext* create_audio_codec_context(AVFormatContext *av_format_con #endif codec_context->time_base.num = 1; - codec_context->time_base.den = AV_TIME_BASE; + codec_context->time_base.den = codec_context->sample_rate; codec_context->framerate.num = fps; codec_context->framerate.den = 1; @@ -1462,11 +1462,12 @@ int main(int argc, char **argv) { av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0); swr_init(swr); + int64_t pts = 0; + const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate; + while(running) { void *sound_buffer; int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer); - if(sound_buffer_size < 0) - sound_buffer = empty_audio; int ret = av_frame_make_writable(audio_track.frame); if (ret < 0) { @@ -1474,15 +1475,42 @@ int main(int argc, char **argv) { break; } - // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format? - swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames); - audio_track.frame->pts = (clock_get_monotonic_seconds() - start_time_pts) * AV_TIME_BASE; + const double this_audio_frame_time = clock_get_monotonic_seconds(); + const int64_t expected_frames = std::round((this_audio_frame_time - start_time_pts) / target_audio_hz); + const int64_t num_missing_frames = std::max(0L, (expected_frames - pts) / audio_track.frame->nb_samples); + // Jesus is there a better way to do this? I JUST WANT TO KEEP VIDEO AND AUDIO SYNCED HOLY FUCK I WANT TO KILL MYSELF NOW. + // THIS PIECE OF SHIT WANTS EMPTY FRAMES OTHERWISE VIDEO PLAYS TOO FAST TO KEEP UP WITH AUDIO OR THE AUDIO PLAYS TOO EARLY. + // BUT WE CANT USE DELAYS TO GIVE DUMMY DATA BECAUSE PULSEAUDIO MIGHT GIVE AUDIO A BIG DELAYED!!! + if(num_missing_frames >= 5) { + // TODO: + //audio_track.frame->data[0] = empty_audio; + swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&empty_audio, audio_track.sound_device.frames); + // TODO: Check if duplicate frame can be saved just by writing it with a different pts instead of sending it again + for(int i = 0; i < num_missing_frames; ++i) { + audio_track.frame->pts = pts; + pts += audio_track.frame->nb_samples; + ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame); + if(ret >= 0){ + receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_track.frame, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, *write_output_mutex); + } else { + fprintf(stderr, "Failed to encode audio!\n"); + } + } + } + + if(sound_buffer_size >= 0) { + // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format? + swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames); - ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame); - if(ret >= 0){ - receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_track.frame, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, *write_output_mutex); - } else { - fprintf(stderr, "Failed to encode audio!\n"); + audio_track.frame->pts = pts; + pts += audio_track.frame->nb_samples; + + ret = avcodec_send_frame(audio_track.codec_context, audio_track.frame); + if(ret >= 0){ + receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_track.frame, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, *write_output_mutex); + } else { + fprintf(stderr, "Failed to encode audio!\n"); + } } } diff --git a/src/sound.cpp b/src/sound.cpp index ab0450e..928ee4a 100644 --- a/src/sound.cpp +++ b/src/sound.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -28,29 +27,6 @@ #include #include -#define CHECK_DEAD_GOTO(p, rerror, label) \ - do { \ - if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \ - !(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \ - if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \ - ((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \ - if (rerror) \ - *(rerror) = pa_context_errno((p)->context); \ - } else \ - if (rerror) \ - *(rerror) = PA_ERR_BADSTATE; \ - goto label; \ - } \ - } while(false); - -static double clock_get_monotonic_seconds() { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 0; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001; -} - static int sound_device_index = 0; struct pa_handle { @@ -154,59 +130,30 @@ fail: return NULL; } -static void pa_sound_device_mainloop_timed(pa_handle *p, int64_t timeout_ms) { - const double start_time = clock_get_monotonic_seconds(); - while((clock_get_monotonic_seconds() - start_time) * 1000.0 < timeout_ms) { - pa_mainloop_prepare(p->mainloop, 1 * 1000); - pa_mainloop_poll(p->mainloop); - pa_mainloop_dispatch(p->mainloop); - } -} - -// Returns a negative value on failure. Always blocks a time specified matching the sampling rate of the audio. +// Returns a negative value on failure or if no data is available at the moment static int pa_sound_device_read(pa_handle *p, void *data, size_t length) { assert(p); - int r = 0; - int *rerror = &r; - bool retry = true; - - pa_mainloop_iterate(p->mainloop, 0, NULL); const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0); + pa_mainloop_prepare(p->mainloop, timeout_ms * 1000); + pa_mainloop_poll(p->mainloop); + pa_mainloop_dispatch(p->mainloop); - CHECK_DEAD_GOTO(p, rerror, fail); - - while(true) { - if(pa_stream_readable_size(p->stream) < length) { - if(!retry) - break; - - retry = false; - pa_sound_device_mainloop_timed(p, timeout_ms); - continue; - } - - r = pa_stream_peek(p->stream, &p->read_data, &p->read_length); - if(r != 0) { - if(retry) - pa_sound_device_mainloop_timed(p, timeout_ms); - return -1; - } + if(pa_stream_readable_size(p->stream) < length) + return -1; - if(p->read_length < length || !p->read_data) { - pa_stream_drop(p->stream); - if(retry) - pa_sound_device_mainloop_timed(p, timeout_ms); - return -1; - } + int r = pa_stream_peek(p->stream, &p->read_data, &p->read_length); + if(r != 0) + return -1; - memcpy(data, p->read_data, length); + if(p->read_length < length || !p->read_data) { pa_stream_drop(p->stream); - return 0; + return -1; } - fail: - return -1; + memcpy(data, p->read_data, length); + pa_stream_drop(p->stream); + return 0; } int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) { -- cgit v1.2.3