diff options
-rw-r--r-- | include/sound.hpp | 5 | ||||
-rw-r--r-- | src/main.cpp | 131 | ||||
-rw-r--r-- | src/sound.cpp | 88 |
3 files changed, 73 insertions, 151 deletions
diff --git a/include/sound.hpp b/include/sound.hpp index 6873e90..32821bc 100644 --- a/include/sound.hpp +++ b/include/sound.hpp @@ -24,6 +24,7 @@ typedef struct { void *handle; unsigned int frames; + double latency_seconds; } SoundDevice; struct AudioInput { @@ -53,9 +54,9 @@ void sound_device_close(SoundDevice *device); /* Returns the next chunk of audio into @buffer. - Returns the number of frames read, or a negative value on failure. + Returns the number of bytes read, or a negative value on failure. */ -int sound_device_read_next_chunk(SoundDevice *device, void **buffer); +int sound_device_read_next_chunk(SoundDevice *device, void **buffer, double timeout_sec); std::vector<AudioInput> get_pulseaudio_inputs(); diff --git a/src/main.cpp b/src/main.cpp index faab93b..b3b206c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -315,7 +315,7 @@ static AVCodecContext* create_audio_codec_context(int fps, AudioCodec audio_code #endif codec_context->time_base.num = 1; - codec_context->time_base.den = codec_context->sample_rate; + codec_context->time_base.den = AV_TIME_BASE; codec_context->framerate.num = fps; codec_context->framerate.den = 1; codec_context->thread_count = 1; @@ -1699,10 +1699,10 @@ int main(int argc, char **argv) { usage(); } - AudioCodec audio_codec = AudioCodec::OPUS; + AudioCodec audio_codec = AudioCodec::AAC; const char *audio_codec_to_use = args["-ac"].value(); if(!audio_codec_to_use) - audio_codec_to_use = "opus"; + audio_codec_to_use = "aac"; if(strcmp(audio_codec_to_use, "aac") == 0) { audio_codec = AudioCodec::AAC; @@ -1715,10 +1715,10 @@ int main(int argc, char **argv) { usage(); } - if(audio_codec == AudioCodec::FLAC) { - fprintf(stderr, "Warning: flac audio codec has been temporary disabled, using opus audio codec instead\n"); - audio_codec_to_use = "opus"; - audio_codec = AudioCodec::OPUS; + if(audio_codec == AudioCodec::OPUS || audio_codec == AudioCodec::FLAC) { + fprintf(stderr, "Warning: opus and flac audio codecs has been temporary disabled, using aac audio codec instead\n"); + audio_codec_to_use = "aac"; + audio_codec = AudioCodec::AAC; } bool overclock = false; @@ -2397,58 +2397,21 @@ int main(int argc, char **argv) { swr_init(swr); } - const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate; - double received_audio_time = clock_get_monotonic_seconds(); - const int64_t timeout_ms = std::round((1000.0 / (double)audio_track.codec_context->sample_rate) * 1000.0); - - // Remove this for now, it doesn't work well for everybody. The timing is different depending on system - #if 0 - // Move audio forward by around 252 ms (for opus/aac), or 42ms for flac. This is just a shitty way to handle audio latency but pulseaudio latency calculation - // returns much lower value which isn't helpful. - if(needs_audio_conversion) - swr_convert(swr, &audio_device.frame->data[0], audio_track.codec_context->frame_size, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size); - else - audio_device.frame->data[0] = empty_audio; - - int num_frames_to_delay = 12; - if(audio_codec == AudioCodec::FLAC) - num_frames_to_delay = 2; - - for(int i = 0; i < num_frames_to_delay; ++i) { - if(audio_track.graph) { - std::lock_guard<std::mutex> lock(audio_filter_mutex); - // TODO: av_buffersrc_add_frame - if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) { - fprintf(stderr, "Error: failed to add audio frame to filter\n"); - } - } else { - int ret = avcodec_send_frame(audio_track.codec_context, audio_device.frame); - if(ret >= 0) { - // TODO: Move to separate thread because this could write to network (for example when livestreaming) - receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_device.frame->pts, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, write_output_mutex, paused_time_offset); - } else { - fprintf(stderr, "Failed to encode audio!\n"); - } - } - audio_device.frame->pts += audio_track.codec_context->frame_size; - } - #endif + const int64_t no_input_sleep_ms = 500; while(running) { void *sound_buffer; int sound_buffer_size = -1; if(audio_device.sound_device.handle) - sound_buffer_size = sound_device_read_next_chunk(&audio_device.sound_device, &sound_buffer); + sound_buffer_size = sound_device_read_next_chunk(&audio_device.sound_device, &sound_buffer, 0.5); + const bool got_audio_data = sound_buffer_size >= 0; const double this_audio_frame_time = clock_get_monotonic_seconds() - paused_time_offset; if(paused) { - if(got_audio_data) - received_audio_time = this_audio_frame_time; - if(!audio_device.sound_device.handle) - usleep(timeout_ms * 1000); + usleep(no_input_sleep_ms * 1000); continue; } @@ -2459,56 +2422,39 @@ int main(int argc, char **argv) { break; } - // TODO: Is this |received_audio_time| really correct? - int64_t num_missing_frames = std::round((this_audio_frame_time - received_audio_time) / target_audio_hz / (int64_t)audio_track.codec_context->frame_size); - if(got_audio_data) - num_missing_frames = std::max((int64_t)0, num_missing_frames - 1); - - if(!audio_device.sound_device.handle) - num_missing_frames = std::max((int64_t)1, num_missing_frames); - - if(got_audio_data) - received_audio_time = this_audio_frame_time; - - // Fucking hell is there a better way to do this? I JUST WANT TO KEEP VIDEO AND AUDIO SYNCED HOLY FUCK I WANT TO KILL MYSELF NOW. - // THIS PIECE OF SHIT WANTS EMPTY FRAMES OTHERWISE VIDEO PLAYS TOO FAST TO KEEP UP WITH AUDIO OR THE AUDIO PLAYS TOO EARLY. - // BUT WE CANT USE DELAYS TO GIVE DUMMY DATA BECAUSE PULSEAUDIO MIGHT GIVE AUDIO A BIG DELAYED!!! - // This garbage is needed because we want to produce constant frame rate videos instead of variable frame rate - // videos because bad software such as video editing software and VLC do not support variable frame rate software, - // despite nvidia shadowplay and xbox game bar producing variable frame rate videos. - // So we have to make sure we produce frames at the same relative rate as the video. - if(num_missing_frames >= 5 || !audio_device.sound_device.handle) { + if(!got_audio_data) { // TODO: //audio_track.frame->data[0] = empty_audio; - received_audio_time = this_audio_frame_time; if(needs_audio_conversion) swr_convert(swr, &audio_device.frame->data[0], audio_track.codec_context->frame_size, (const uint8_t**)&empty_audio, audio_track.codec_context->frame_size); else audio_device.frame->data[0] = empty_audio; - // TODO: Check if duplicate frame can be saved just by writing it with a different pts instead of sending it again - std::lock_guard<std::mutex> lock(audio_filter_mutex); - for(int i = 0; i < num_missing_frames; ++i) { - if(audio_track.graph) { - // TODO: av_buffersrc_add_frame - if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) { - fprintf(stderr, "Error: failed to add audio frame to filter\n"); - } + const int64_t new_pts = (this_audio_frame_time - record_start_time) * AV_TIME_BASE; + if(new_pts == audio_device.frame->pts) + continue; + audio_device.frame->pts = new_pts; + //audio_device.frame->linesize[0] = sound_buffer_size / 2; + + if(audio_track.graph) { + std::lock_guard<std::mutex> lock(audio_filter_mutex); + // TODO: av_buffersrc_add_frame + if(av_buffersrc_write_frame(audio_device.src_filter_ctx, audio_device.frame) < 0) { + fprintf(stderr, "Error: failed to add audio frame to filter\n"); + } + } else { + ret = avcodec_send_frame(audio_track.codec_context, audio_device.frame); + if(ret >= 0) { + // TODO: Move to separate thread because this could write to network (for example when livestreaming) + receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_device.frame->pts, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, write_output_mutex, paused_time_offset); } else { - ret = avcodec_send_frame(audio_track.codec_context, audio_device.frame); - if(ret >= 0) { - // TODO: Move to separate thread because this could write to network (for example when livestreaming) - receive_frames(audio_track.codec_context, audio_track.stream_index, audio_track.stream, audio_device.frame->pts, av_format_context, record_start_time, frame_data_queue, replay_buffer_size_secs, frames_erased, write_output_mutex, paused_time_offset); - } else { - fprintf(stderr, "Failed to encode audio!\n"); - } + fprintf(stderr, "Failed to encode audio!\n"); } - audio_device.frame->pts += audio_track.codec_context->frame_size; } } if(!audio_device.sound_device.handle) - usleep(timeout_ms * 1000); + usleep(no_input_sleep_ms * 1000); if(got_audio_data) { // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format? @@ -2517,6 +2463,12 @@ int main(int argc, char **argv) { else audio_device.frame->data[0] = (uint8_t*)sound_buffer; + const int64_t new_pts = (this_audio_frame_time - record_start_time) * AV_TIME_BASE; + if(new_pts == audio_device.frame->pts) + continue; + audio_device.frame->pts = new_pts; + //audio_device.frame->linesize[0] = sound_buffer_size / 2; + if(audio_track.graph) { std::lock_guard<std::mutex> lock(audio_filter_mutex); // TODO: av_buffersrc_add_frame @@ -2532,8 +2484,6 @@ int main(int argc, char **argv) { fprintf(stderr, "Failed to encode audio!\n"); } } - - audio_device.frame->pts += audio_track.codec_context->frame_size; } } @@ -2571,7 +2521,11 @@ int main(int argc, char **argv) { int err = 0; while ((err = av_buffersink_get_frame(audio_track.sink, aframe)) >= 0) { - aframe->pts = audio_track.pts; + const int64_t new_pts = ((clock_get_monotonic_seconds() - paused_time_offset) - record_start_time) * AV_TIME_BASE; + if(new_pts == aframe->pts) + continue; + aframe->pts = new_pts; + //aframe->linesize[0] = sound_buffer_size / 2; err = avcodec_send_frame(audio_track.codec_context, aframe); if(err >= 0){ // TODO: Move to separate thread because this could write to network (for example when livestreaming) @@ -2580,7 +2534,6 @@ int main(int argc, char **argv) { fprintf(stderr, "Failed to encode audio!\n"); } av_frame_unref(aframe); - audio_track.pts += audio_track.codec_context->frame_size; } } } diff --git a/src/sound.cpp b/src/sound.cpp index c3aa4d4..99342f2 100644 --- a/src/sound.cpp +++ b/src/sound.cpp @@ -41,6 +41,7 @@ struct pa_handle { size_t output_index, output_length; int operation_success; + double latency_seconds; }; static void pa_sound_device_free(pa_handle *s) { @@ -79,6 +80,7 @@ static pa_handle* pa_sound_device_new(const char *server, p->read_data = NULL; p->read_length = 0; p->read_index = 0; + p->latency_seconds = 0; const int buffer_size = attr->maxlength; void *buffer = malloc(buffer_size); @@ -153,78 +155,41 @@ fail: return NULL; } -// Returns a negative value on failure or if |p->output_length| data is not available within the time frame specified by the sample rate -static int pa_sound_device_read(pa_handle *p) { +static int pa_sound_device_read(pa_handle *p, double timeout_seconds) { assert(p); - const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0); const double start_time = clock_get_monotonic_seconds(); - bool success = false; int r = 0; + //pa_usec_t latency = 0; + //int negative = 0; int *rerror = &r; CHECK_DEAD_GOTO(p, rerror, fail); - while (p->output_index < p->output_length) { - if((clock_get_monotonic_seconds() - start_time) * 1000 >= timeout_ms) - return -1; + while(clock_get_monotonic_seconds() - start_time < timeout_seconds) { + pa_mainloop_prepare(p->mainloop, 1 * 1000); + pa_mainloop_poll(p->mainloop); + pa_mainloop_dispatch(p->mainloop); - if(!p->read_data) { - pa_mainloop_prepare(p->mainloop, 1 * 1000); // 1 ms - pa_mainloop_poll(p->mainloop); - pa_mainloop_dispatch(p->mainloop); - - if(pa_stream_peek(p->stream, &p->read_data, &p->read_length) < 0) - goto fail; - - if(!p->read_data && p->read_length == 0) - continue; + if(pa_stream_peek(p->stream, &p->read_data, &p->read_length) < 0) + goto fail; - if(!p->read_data && p->read_length > 0) { - // There is a hole in the stream :( drop it. Maybe we should generate silence instead? TODO - if(pa_stream_drop(p->stream) != 0) - goto fail; - continue; - } + if(!p->read_data && p->read_length == 0) + continue; - if(p->read_length <= 0) { - p->read_data = NULL; - if(pa_stream_drop(p->stream) != 0) - goto fail; + // pa_operation_unref(pa_stream_update_timing_info(p->stream, NULL, NULL)); + // if (pa_stream_get_latency(p->stream, &latency, &negative) >= 0) { + // fprintf(stderr, "latency: %lu ms, negative: %d, extra delay: %f ms\n", latency / 1000, negative, (clock_get_monotonic_seconds() - start_time) * 1000.0); + // } - CHECK_DEAD_GOTO(p, rerror, fail); - continue; - } - } - - const size_t space_free_in_output_buffer = p->output_length - p->output_index; - if(space_free_in_output_buffer < p->read_length) { - memcpy(p->output_data + p->output_index, (const uint8_t*)p->read_data + p->read_index, space_free_in_output_buffer); - p->output_index = 0; - p->read_index += space_free_in_output_buffer; - p->read_length -= space_free_in_output_buffer; - break; - } else { - memcpy(p->output_data + p->output_index, (const uint8_t*)p->read_data + p->read_index, p->read_length); - p->output_index += p->read_length; - p->read_data = NULL; - p->read_length = 0; - p->read_index = 0; - - if(pa_stream_drop(p->stream) != 0) - goto fail; - - if(p->output_index == p->output_length) { - p->output_index = 0; - break; - } - } + memcpy(p->output_data, p->read_data, p->read_length); + pa_stream_drop(p->stream); + p->latency_seconds = clock_get_monotonic_seconds() - start_time; + return p->read_length; } - success = true; - fail: - return success ? 0 : -1; + return -1; } static pa_sample_format_t audio_format_to_pulse_audio_format(AudioFormat audio_format) { @@ -269,6 +234,7 @@ int sound_device_get_by_name(SoundDevice *device, const char *device_name, const device->handle = handle; device->frames = period_frame_size; + device->latency_seconds = 0.0; return 0; } @@ -278,14 +244,16 @@ void sound_device_close(SoundDevice *device) { device->handle = NULL; } -int sound_device_read_next_chunk(SoundDevice *device, void **buffer) { +int sound_device_read_next_chunk(SoundDevice *device, void **buffer, double timeout_sec) { pa_handle *pa = (pa_handle*)device->handle; - if(pa_sound_device_read(pa) < 0) { + int size = pa_sound_device_read(pa, timeout_sec); + if(size < 0) { //fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error)); return -1; } *buffer = pa->output_data; - return device->frames; + device->latency_seconds = pa->latency_seconds; + return size; } static void pa_state_cb(pa_context *c, void *userdata) { |