feat: Add synchronization to start of audio recordings (#1984)

OmLanke · pre-commit-ci[bot] · VincentRPS · web-flow · commit 7a6a42c5d17f · 2023-05-01T17:27:16.000Z
Signed-off-by: Om &lt;92863779+Om1609@users.noreply.github.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: VincentRPS &lt;vincentbusiness55@gmail.com&gt;
Co-authored-by: plun1331 &lt;49261529+plun1331@users.noreply.github.com&gt;
Co-authored-by: Lala Sabathil &lt;lala@pycord.dev&gt;
Co-authored-by: JustaSqu1d &lt;overenchanted.gaming@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,9 @@ These changes are available on the `master` branch, but have not yet been releas
   ([#1983](https://github.com/Pycord-Development/pycord/pull/1983))
 - Added new `application_auto_moderation_rule_create_badge` to `ApplicationFlags`.
   ([#1992](https://github.com/Pycord-Development/pycord/pull/1992))
+- Added `sync_start` argument to `VoiceClient.start_recording()`. This adds silence to
+  the start of audio recordings.
+  ([#1984](https://github.com/Pycord-Development/pycord/pull/1984))
 - Added `custom_message` to AutoModActionMetadata.
   ([#2029](https://github.com/Pycord-Development/pycord/pull/2029))
 - Added support for
diff --git a/discord/sinks/core.py b/discord/sinks/core.py
@@ -115,6 +115,7 @@ def __init__(self, data, client):
         self.decoded_data = None
 
         self.user_id = None
+        self.receive_time = time.perf_counter()
 
 
 class AudioData:
diff --git a/discord/voice_client.py b/discord/voice_client.py
@@ -700,7 +700,7 @@ def unpack_audio(self, data):
 
         self.decoder.decode(data)
 
-    def start_recording(self, sink, callback, *args):
+    def start_recording(self, sink, callback, *args, sync_start: bool = False):
         """The bot will begin recording audio from the current voice channel it is in.
         This function uses a thread so the current code line will not be stopped.
         Must be in a voice channel to use.
@@ -716,6 +716,9 @@ def start_recording(self, sink, callback, *args):
             A function which is called after the bot has stopped recording.
         *args:
             Args which will be passed to the callback function.
+        sync_start: :class:`bool`
+            If True, the recordings of subsequent users will start with silence.
+            This is useful for recording audio just as it was heard.
 
         Raises
         ------
@@ -738,6 +741,7 @@ def start_recording(self, sink, callback, *args):
         self.decoder = opus.DecodeManager(self)
         self.decoder.start()
         self.recording = True
+        self.sync_start = sync_start
         self.sink = sink
         sink.init(self)
 
@@ -796,8 +800,9 @@ def recv_audio(self, sink, callback, *args):
         # it by user, handles pcm files and
         # silence that should be added.
 
-        self.user_timestamps = {}
+        self.user_timestamps: dict[int, tuple[int, float]] = {}
         self.starting_time = time.perf_counter()
+        self.first_packet_timestamp: float
         while self.recording:
             ready, _, err = select.select([self.socket], [], [self.socket], 0.01)
             if not ready:
@@ -815,27 +820,46 @@ def recv_audio(self, sink, callback, *args):
 
         self.stopping_time = time.perf_counter()
         self.sink.cleanup()
-        callback = asyncio.run_coroutine_threadsafe(
-            callback(self.sink, *args), self.loop
-        )
+        callback = asyncio.run_coroutine_threadsafe(callback(sink, *args), self.loop)
         result = callback.result()
 
         if result is not None:
             print(result)
 
-    def recv_decoded_audio(self, data):
-        if data.ssrc not in self.user_timestamps:
-            self.user_timestamps.update({data.ssrc: data.timestamp})
-            # Add silence when they were not being recorded.
-            silence = 0
-        else:
-            silence = data.timestamp - self.user_timestamps[data.ssrc] - 960
-            self.user_timestamps[data.ssrc] = data.timestamp
+    def recv_decoded_audio(self, data: RawData):
+        # Add silence when they were not being recorded.
+        if data.ssrc not in self.user_timestamps:  # First packet from user
+            if (
+                not self.user_timestamps or not self.sync_start
+            ):  # First packet from anyone
+                self.first_packet_timestamp = data.receive_time
+                silence = 0
+
+            else:  # Previously received a packet from someone else
+                silence = (
+                    (data.receive_time - self.first_packet_timestamp) * 48000
+                ) - 960
+
+        else:  # Previously received a packet from user
+            dRT = (
+                data.receive_time - self.user_timestamps[data.ssrc][1]
+            ) * 48000  # delta receive time
+            dT = data.timestamp - self.user_timestamps[data.ssrc][0]  # delta timestamp
+            diff = abs(100 - dT * 100 / dRT)
+            if (
+                diff > 60 and dT != 960
+            ):  # If the difference in change is more than 60% threshold
+                silence = dRT - 960
+            else:
+                silence = dT - 960
+
+        self.user_timestamps.update({data.ssrc: (data.timestamp, data.receive_time)})
 
         data.decoded_data = (
-            struct.pack("<h", 0) * silence * opus._OpusStruct.CHANNELS
+            struct.pack("<h", 0) * max(0, int(silence)) * opus._OpusStruct.CHANNELS
             + data.decoded_data
         )
+
         while data.ssrc not in self.ws.ssrc_map:
             time.sleep(0.05)
         self.sink.write(data.decoded_data, self.ws.ssrc_map[data.ssrc]["user_id"])
diff --git a/examples/audio_recording_merged.py b/examples/audio_recording_merged.py
@@ -0,0 +1,115 @@
+import io
+
+import pydub  # pip install pydub==0.25.1
+
+import discord
+from discord.sinks import MP3Sink
+
+bot = discord.Bot()
+connections: dict[int, discord.VoiceClient] = {}
+
+
+@bot.event
+async def on_ready():
+    print(f"Logged in as {bot.user}")
+
+
+async def finished_callback(sink: MP3Sink, channel: discord.TextChannel):
+    mention_strs = []
+    audio_segs: list[pydub.AudioSegment] = []
+    files: list[discord.File] = []
+
+    longest = pydub.AudioSegment.empty()
+
+    for user_id, audio in sink.audio_data.items():
+        mention_strs.append(f"<@{user_id}>")
+
+        seg = pydub.AudioSegment.from_file(audio.file, format="mp3")
+
+        # Determine the longest audio segment
+        if len(seg) > len(longest):
+            audio_segs.append(longest)
+            longest = seg
+        else:
+            audio_segs.append(seg)
+
+        audio.file.seek(0)
+        files.append(discord.File(audio.file, filename=f"{user_id}.mp3"))
+
+    for seg in audio_segs:
+        longest = longest.overlay(seg)
+
+    with io.BytesIO() as f:
+        longest.export(f, format="mp3")
+        await channel.send(
+            f"Finished! Recorded audio for {', '.join(mention_strs)}.",
+            files=files + [discord.File(f, filename="recording.mp3")],
+        )
+
+
+@bot.command()
+async def join(ctx: discord.ApplicationContext):
+    """Join the voice channel!"""
+    voice = ctx.author.voice
+
+    if not voice:
+        return await ctx.respond("You're not in a vc right now")
+
+    vc = await voice.channel.connect()
+    connections.update({ctx.guild.id: vc})
+
+    await ctx.respond("Joined!")
+
+
+@bot.command()
+async def start(ctx: discord.ApplicationContext):
+    """Record the voice channel!"""
+    voice = ctx.author.voice
+
+    if not voice:
+        return await ctx.respond("You're not in a vc right now")
+
+    vc = connections.get(ctx.guild.id)
+
+    if not vc:
+        return await ctx.respond(
+            "I'm not in a vc right now. Use `/join` to make me join!"
+        )
+
+    vc.start_recording(
+        MP3Sink(),
+        finished_callback,
+        ctx.channel,
+        sync_start=True,
+    )
+
+    await ctx.respond("The recording has started!")
+
+
+@bot.command()
+async def stop(ctx: discord.ApplicationContext):
+    """Stop the recording"""
+    vc = connections.get(ctx.guild.id)
+
+    if not vc:
+        return await ctx.respond("There's no recording going on right now")
+
+    vc.stop_recording()
+
+    await ctx.respond("The recording has stopped!")
+
+
+@bot.command()
+async def leave(ctx: discord.ApplicationContext):
+    """Leave the voice channel!"""
+    vc = connections.get(ctx.guild.id)
+
+    if not vc:
+        return await ctx.respond("I'm not in a vc right now")
+
+    await vc.disconnect()
+
+    await ctx.respond("Left!")
+
+
+bot.run("TOKEN")