From 0059724fdc4a8eba89fdfa4d65ad72e3f8c75ff4 Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Thu, 22 Sep 2022 01:44:45 +0200
Subject: Read audio data from pulseaudio as it's available and buffer it.
 Fixes audio recording on pulseaudio (and some pipewire configs)

---
 include/sound.hpp |   2 -
 src/main.cpp      |  15 ++++--
 src/sound.cpp     | 153 +++++++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 132 insertions(+), 38 deletions(-)
diff --git a/include/sound.hpp b/include/sound.hpp
index 666d009..39d9248 100644
--- a/include/sound.hpp
+++ b/include/sound.hpp
@@ -20,8 +20,6 @@
 
 typedef struct {
     void *handle;
-    void *buffer;
-    int buffer_size;
     unsigned int frames;
 } SoundDevice;
 
diff --git a/src/main.cpp b/src/main.cpp
index 67552f4..7a17c84 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1470,10 +1470,16 @@ int main(int argc, char **argv) {
 
             int64_t pts = 0;
             const double target_audio_hz = 1.0 / (double)audio_track.codec_context->sample_rate;
+            double received_audio_time = clock_get_monotonic_seconds();
 
             while(running) {
                 void *sound_buffer;
                 int sound_buffer_size = sound_device_read_next_chunk(&audio_track.sound_device, &sound_buffer);
+                const bool got_audio_data = sound_buffer_size >= 0;
+
+                const double this_audio_frame_time = clock_get_monotonic_seconds();
+                if(got_audio_data)
+                    received_audio_time = this_audio_frame_time;
 
                 int ret = av_frame_make_writable(audio_track.frame);
                 if (ret < 0) {
@@ -1481,15 +1487,14 @@ int main(int argc, char **argv) {
                     break;
                 }
 
-                const double this_audio_frame_time = clock_get_monotonic_seconds();
-                const int64_t expected_frames = std::round((this_audio_frame_time - start_time_pts) / target_audio_hz);
-                const int64_t num_missing_frames = std::max(0L, (expected_frames - pts) / audio_track.frame->nb_samples);
+                const int64_t num_missing_frames = std::round((this_audio_frame_time - received_audio_time) / target_audio_hz / (int64_t)audio_track.frame->nb_samples);
                 // Jesus is there a better way to do this? I JUST WANT TO KEEP VIDEO AND AUDIO SYNCED HOLY FUCK I WANT TO KILL MYSELF NOW.
                 // THIS PIECE OF SHIT WANTS EMPTY FRAMES OTHERWISE VIDEO PLAYS TOO FAST TO KEEP UP WITH AUDIO OR THE AUDIO PLAYS TOO EARLY.
                 // BUT WE CANT USE DELAYS TO GIVE DUMMY DATA BECAUSE PULSEAUDIO MIGHT GIVE AUDIO A BIG DELAYED!!!
-                if(num_missing_frames >= 5) {
+                if(num_missing_frames >= 5 || (num_missing_frames > 0 && got_audio_data)) {
                     // TODO:
                     //audio_track.frame->data[0] = empty_audio;
+                    received_audio_time = this_audio_frame_time;
                     swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&empty_audio, audio_track.sound_device.frames);
                     // TODO: Check if duplicate frame can be saved just by writing it with a different pts instead of sending it again
                     for(int i = 0; i < num_missing_frames; ++i) {
@@ -1504,7 +1509,7 @@ int main(int argc, char **argv) {
                     }
                 }
 
-                if(sound_buffer_size >= 0) {
+                if(got_audio_data) {
                     // TODO: Instead of converting audio, get float audio from alsa. Or does alsa do conversion internally to get this format?
                     swr_convert(swr, &audio_track.frame->data[0], audio_track.frame->nb_samples, (const uint8_t**)&sound_buffer, audio_track.sound_device.frames);
 
diff --git a/src/sound.cpp b/src/sound.cpp
index 928ee4a..9978e6f 100644
--- a/src/sound.cpp
+++ b/src/sound.cpp
@@ -21,14 +21,38 @@
 #include <stdio.h>
 #include <string.h>
 #include <cmath>
+#include <time.h>
 
 #include <pulse/pulseaudio.h>
 #include <pulse/mainloop.h>
 #include <pulse/xmalloc.h>
 #include <pulse/error.h>
 
+#define CHECK_DEAD_GOTO(p, rerror, label)                               \
+    do {                                                                \
+        if (!(p)->context || !PA_CONTEXT_IS_GOOD(pa_context_get_state((p)->context)) || \
+            !(p)->stream || !PA_STREAM_IS_GOOD(pa_stream_get_state((p)->stream))) { \
+            if (((p)->context && pa_context_get_state((p)->context) == PA_CONTEXT_FAILED) || \
+                ((p)->stream && pa_stream_get_state((p)->stream) == PA_STREAM_FAILED)) { \
+                if (rerror)                                             \
+                    *(rerror) = pa_context_errno((p)->context);         \
+            } else                                                      \
+                if (rerror)                                             \
+                    *(rerror) = PA_ERR_BADSTATE;                        \
+            goto label;                                                 \
+        }                                                               \
+    } while(false);
+
 static int sound_device_index = 0;
 
+static double clock_get_monotonic_seconds() {
+    struct timespec ts;
+    ts.tv_sec = 0;
+    ts.tv_nsec = 0;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec * 0.000000001;
+}
+
 struct pa_handle {
     pa_context *context;
     pa_stream *stream;
@@ -37,6 +61,9 @@ struct pa_handle {
     const void *read_data;
     size_t read_index, read_length;
 
+    uint8_t *output_data;
+    size_t output_index, output_length;
+
     int operation_success;
 };
 
@@ -54,6 +81,11 @@ static void pa_sound_device_free(pa_handle *s) {
     if (s->mainloop)
         pa_mainloop_free(s->mainloop);
 
+    if (s->output_data) {
+        free(s->output_data);
+        s->output_data = NULL;
+    }
+
     pa_xfree(s);
 }
 
@@ -68,6 +100,21 @@ static pa_handle* pa_sound_device_new(const char *server,
     int error = PA_ERR_INTERNAL, r;
 
     p = pa_xnew0(pa_handle, 1);
+    p->read_data = NULL;
+    p->read_length = 0;
+    p->read_index = 0;
+
+    const int buffer_size = attr->maxlength;
+    void *buffer = malloc(buffer_size);
+    if(!buffer) {
+        fprintf(stderr, "failed to allocate buffer for audio\n");
+        *rerror = -1;
+        return NULL;
+    }
+
+    p->output_data = (uint8_t*)buffer;
+    p->output_length = buffer_size;
+    p->output_index = 0;
 
     if (!(p->mainloop = pa_mainloop_new()))
         goto fail;
@@ -130,30 +177,86 @@ fail:
     return NULL;
 }
 
-// Returns a negative value on failure or if no data is available at the moment
-static int pa_sound_device_read(pa_handle *p, void *data, size_t length) {
+// Returns a negative value on failure or if |p->output_length| data is not available within the time frame specified by the sample rate
+static int pa_sound_device_read(pa_handle *p) {
     assert(p);
 
     const int64_t timeout_ms = std::round((1000.0 / (double)pa_stream_get_sample_spec(p->stream)->rate) * 1000.0);
-    pa_mainloop_prepare(p->mainloop, timeout_ms * 1000);
-    pa_mainloop_poll(p->mainloop);
-    pa_mainloop_dispatch(p->mainloop);
+    const double start_time = clock_get_monotonic_seconds();
+
+    bool success = false;
+    int r = 0;
+    int *rerror = &r;
+    CHECK_DEAD_GOTO(p, rerror, fail);
+
+    while (p->output_index < p->output_length) {
+        if((clock_get_monotonic_seconds() - start_time) * 1000 >= timeout_ms)
+            return -1;
+
+        if(p->read_data) {
+            assert(p->output_index == 0);
+            memcpy(p->output_data, (const uint8_t*)p->read_data + p->read_index, p->read_length);
+            p->output_index += p->read_length;
+            p->read_data = NULL;
+            p->read_length = 0;
+            p->read_index = 0;
+
+            if(pa_stream_drop(p->stream) != 0)
+                goto fail;
+        }
 
-    if(pa_stream_readable_size(p->stream) < length)
-        return -1;
+        pa_mainloop_prepare(p->mainloop, 1 * 1000); // 1 ms
+        pa_mainloop_poll(p->mainloop);
+        pa_mainloop_dispatch(p->mainloop);
 
-    int r = pa_stream_peek(p->stream, &p->read_data, &p->read_length);
-    if(r != 0)
-        return -1;
+        if(pa_stream_peek(p->stream, &p->read_data, &p->read_length) < 0)
+            goto fail;
 
-    if(p->read_length < length || !p->read_data) {
-        pa_stream_drop(p->stream);
-        return -1;
+        if(!p->read_data && p->read_length == 0)
+            continue;
+
+        if(!p->read_data && p->read_length > 0) {
+            // There is a hole in the stream :( drop it. Maybe we should generate silence instead? TODO
+            if(pa_stream_drop(p->stream) != 0)
+                goto fail;
+            continue;
+        }
+
+        if(p->read_length <= 0) {
+            CHECK_DEAD_GOTO(p, rerror, fail);
+            continue;
+        }
+
+        const size_t space_free_in_output_buffer = p->output_length - p->output_index;
+        if(space_free_in_output_buffer < p->read_length) {
+            assert(p->read_index == 0);
+            memcpy(p->output_data + p->output_index, p->read_data, space_free_in_output_buffer);
+            p->output_index = 0;
+            p->read_index += space_free_in_output_buffer;
+            p->read_length -= space_free_in_output_buffer;
+            break;
+        } else {
+            assert(p->read_index == 0);
+            memcpy(p->output_data + p->output_index, p->read_data, p->read_length);
+            p->output_index += p->read_length;
+            p->read_data = NULL;
+            p->read_length = 0;
+            p->read_index = 0;
+            
+            if(pa_stream_drop(p->stream) != 0)
+                goto fail;
+
+            if(p->output_index == p->output_length) {
+                p->output_index = 0;
+                break;
+            }
+        }
     }
 
-    memcpy(data, p->read_data, length);
-    pa_stream_drop(p->stream);
-    return 0;
+    success = true;
+
+    fail:
+    return success ? 0 : -1;
 }
 
 int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int num_channels, unsigned int period_frame_size) {
@@ -181,33 +284,21 @@ int sound_device_get_by_name(SoundDevice *device, const char *name, unsigned int
         return -1;
     }
 
-    int buffer_size = buffer_attr.maxlength;
-    void *buffer = malloc(buffer_size);
-    if(!buffer) {
-        fprintf(stderr, "failed to allocate buffer for audio\n");
-        pa_sound_device_free(handle);
-        return -1;
-    }
-
-    fprintf(stderr, "Using pulseaudio\n");
-
     device->handle = handle;
-    device->buffer = buffer;
-    device->buffer_size = buffer_size;
     device->frames = period_frame_size;
     return 0;
 }
 
 void sound_device_close(SoundDevice *device) {
     pa_sound_device_free((pa_handle*)device->handle);
-    free(device->buffer);
 }
 
 int sound_device_read_next_chunk(SoundDevice *device, void **buffer) {
-    if(pa_sound_device_read((pa_handle*)device->handle, device->buffer, device->buffer_size) < 0) {
+    pa_handle *pa = (pa_handle*)device->handle;
+    if(pa_sound_device_read(pa) < 0) {
         //fprintf(stderr, "pa_simple_read() failed: %s\n", pa_strerror(error));
         return -1;
     }
-    *buffer = device->buffer;
+    *buffer = pa->output_data;
     return device->frames;
 }
\ No newline at end of file
-- 
cgit v1.2.3-70-g09d2