Skip to content

Commit 3099183

Browse files
committed
Native CoreAudio renderer with spatial audio
* Inspired by an example app from Apple. [1] * Needs an M1 or newer Mac. Eventually should work on iOS/tvOS, although you might need iOS 18. * Operates in a standard passthrough mode for stereo or when you have enough real channels (HDMI). * When headphones or built-im Macbook speakers are detected, this enables spatial audio via the amazing AUSpatialMixer, which is capable of rendering any number of channels up to 7.1.4 Atmos in very high quality binaural stereo. * Supports personalized HRTF if you've scanned your ears with your iPhone. * Added a new section in the upper-right of the stats overlay with audio stats. Planned features: * Head-tracking is possible but disabled until there is a config option. Also, the system sound menu doesn't indicate spatial audio is active, giving you no way to change any settings. [1] https://developer.apple.com/documentation/audiotoolbox/generating_spatial_audio_from_a_multichannel_audio_stream
1 parent 25132a1 commit 3099183

30 files changed

+2069
-8
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
**/.vs/
5+
.vscode/
56
build/
67
config.tests/*/.qmake.stash
78
config.tests/*/Makefile

app/app.pro

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,18 @@ macx {
160160
CONFIG += discord-rpc
161161
}
162162

163-
LIBS += -lobjc -framework VideoToolbox -framework AVFoundation -framework CoreVideo -framework CoreGraphics -framework CoreMedia -framework AppKit -framework Metal -framework QuartzCore
164-
165-
# For libsoundio
166-
LIBS += -framework CoreAudio -framework AudioUnit
163+
LIBS += -lobjc \
164+
-framework AppKit \
165+
-framework AudioToolbox \
166+
-framework AudioUnit \
167+
-framework AVFoundation \
168+
-framework CoreAudio \
169+
-framework CoreVideo \
170+
-framework CoreGraphics \
171+
-framework CoreMedia \
172+
-framework Metal \
173+
-framework QuartzCore \
174+
-framework VideoToolbox
167175

168176
CONFIG += ffmpeg soundio
169177
}
@@ -391,14 +399,23 @@ win32:!winrt {
391399
streaming/video/ffmpeg-renderers/pacer/dxvsyncsource.h
392400
}
393401
macx {
394-
message(VideoToolbox renderer selected)
402+
message(CoreAudio + VideoToolbox renderers selected)
403+
404+
DEFINES += HAVE_COREAUDIO
395405

396406
SOURCES += \
407+
streaming/audio/renderers/coreaudio/au_spatial_renderer.mm \
408+
streaming/audio/renderers/coreaudio/coreaudio.cpp \
409+
streaming/audio/renderers/coreaudio/TPCircularBuffer.c \
397410
streaming/video/ffmpeg-renderers/vt_base.mm \
398411
streaming/video/ffmpeg-renderers/vt_avsamplelayer.mm \
399412
streaming/video/ffmpeg-renderers/vt_metal.mm
400413

401414
HEADERS += \
415+
streaming/audio/renderers/coreaudio/au_spatial_renderer.h \
416+
streaming/audio/renderers/coreaudio/coreaudio.h \
417+
streaming/audio/renderers/coreaudio/coreaudio_helpers.h \
418+
streaming/audio/renderers/coreaudio/TPCircularBuffer.h \
402419
streaming/video/ffmpeg-renderers/vt.h
403420
}
404421
soundio {

app/spatial-audio.entitlements

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3+
<plist version="1.0">
4+
<dict>
5+
<key>com.apple.security.app-sandbox</key>
6+
<true/>
7+
<key>com.apple.developer.spatial-audio.profile-access</key>
8+
<true/>
9+
<key>com.apple.developer.coremotion.head-pose</key>
10+
<true/>
11+
</dict>
12+
</plist>

app/streaming/audio/audio.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include "renderers/slaud.h"
1010
#endif
1111

12+
#ifdef HAVE_COREAUDIO
13+
#include "renderers/coreaudio/coreaudio.h"
14+
#endif
15+
1216
#include "renderers/sdl.h"
1317

1418
#include <Limelight.h>
@@ -29,6 +33,12 @@ IAudioRenderer* Session::createAudioRenderer(const POPUS_MULTISTREAM_CONFIGURATI
2933
TRY_INIT_RENDERER(SdlAudioRenderer, opusConfig)
3034
return nullptr;
3135
}
36+
#ifdef HAVE_COREAUDIO
37+
else if (mlAudio == "coreaudio") {
38+
TRY_INIT_RENDERER(CoreAudioRenderer, opusConfig)
39+
return nullptr;
40+
}
41+
#endif
3242
#ifdef HAVE_SOUNDIO
3343
else if (mlAudio == "libsoundio") {
3444
TRY_INIT_RENDERER(SoundIoAudioRenderer, opusConfig)
@@ -55,6 +65,11 @@ IAudioRenderer* Session::createAudioRenderer(const POPUS_MULTISTREAM_CONFIGURATI
5565
TRY_INIT_RENDERER(SLAudioRenderer, opusConfig)
5666
#endif
5767

68+
#ifdef HAVE_COREAUDIO
69+
// Native renderer for macOS/iOS/tvOS, suports spatial audio
70+
TRY_INIT_RENDERER(CoreAudioRenderer, opusConfig)
71+
#endif
72+
5873
// Default to SDL and use libsoundio as a fallback
5974
TRY_INIT_RENDERER(SdlAudioRenderer, opusConfig)
6075
#ifdef HAVE_SOUNDIO
@@ -157,6 +172,8 @@ int Session::arInit(int /* audioConfiguration */,
157172

158173
void Session::arCleanup()
159174
{
175+
s_ActiveSession->m_AudioRenderer->logGlobalAudioStats();
176+
160177
delete s_ActiveSession->m_AudioRenderer;
161178
s_ActiveSession->m_AudioRenderer = nullptr;
162179

@@ -239,6 +256,22 @@ void Session::arDecodeAndPlaySample(char* sampleData, int sampleLength)
239256
desiredBufferSize = 0;
240257
}
241258

259+
// used to display the raw audio bitrate
260+
s_ActiveSession->m_AudioRenderer->statsAddOpusBytesReceived(sampleLength);
261+
262+
// Flip stats windows roughly every second
263+
if (SDL_TICKS_PASSED(SDL_GetTicks(), s_ActiveSession->m_AudioRenderer->getActiveWndAudioStats().measurementStartTimestamp + 1000)) {
264+
if (s_ActiveSession->getOverlayManager().isOverlayEnabled(Overlay::OverlayDebugAudio)) {
265+
AUDIO_STATS lastTwoWndAudioStats = {};
266+
s_ActiveSession->m_AudioRenderer->snapshotAudioStats(lastTwoWndAudioStats);
267+
268+
s_ActiveSession->m_AudioRenderer->stringifyAudioStats(lastTwoWndAudioStats,
269+
s_ActiveSession->getOverlayManager().getOverlayText(Overlay::OverlayDebugAudio),
270+
s_ActiveSession->getOverlayManager().getOverlayMaxTextLength());
271+
s_ActiveSession->getOverlayManager().setOverlayTextUpdated(Overlay::OverlayDebugAudio);
272+
}
273+
}
274+
242275
if (!s_ActiveSession->m_AudioRenderer->submitAudio(desiredBufferSize)) {
243276
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
244277
"Reinitializing audio renderer after failure");
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
Copyright © 2024 Apple Inc.
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
5+
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
6+
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7+
and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8+
9+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10+
11+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
12+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
13+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
14+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15+
*/
16+
#pragma once
17+
18+
#include <AudioToolbox/AudioToolbox.h>
19+
20+
class AllocatedAudioBufferList
21+
{
22+
public:
23+
AllocatedAudioBufferList(UInt32 channelCount, uint16_t bufferSize)
24+
{
25+
26+
mBufferList = static_cast<AudioBufferList *>(malloc(sizeof(AudioBufferList) + (sizeof(AudioBuffer) * channelCount)));
27+
mBufferList->mNumberBuffers = channelCount;
28+
for (UInt32 c = 0; c < channelCount; ++c) {
29+
mBufferList->mBuffers[c].mNumberChannels = 1;
30+
mBufferList->mBuffers[c].mDataByteSize = bufferSize * sizeof(float);
31+
mBufferList->mBuffers[c].mData = malloc(sizeof(float) * bufferSize);
32+
}
33+
}
34+
35+
AllocatedAudioBufferList(const AllocatedAudioBufferList&) = delete;
36+
37+
AllocatedAudioBufferList& operator=(const AllocatedAudioBufferList&) = delete;
38+
39+
~AllocatedAudioBufferList()
40+
{
41+
if (mBufferList == nullptr) { return; }
42+
43+
for (UInt32 i = 0; i < mBufferList->mNumberBuffers; ++i) {
44+
free(mBufferList->mBuffers[i].mData);
45+
}
46+
free(mBufferList);
47+
mBufferList = nullptr;
48+
}
49+
50+
AudioBufferList * _Nonnull get()
51+
{
52+
return mBufferList;
53+
}
54+
55+
private:
56+
AudioBufferList * _Nonnull mBufferList = { nullptr };
57+
};
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
Moonlight CoreAudio supports 2 modes:
2+
3+
1. A normal passthrough mode where decoded PCM from the Opus stream is passed directly to the output Audio Unit. This mode
4+
is used when the incoming stream is stereo or when the local output device is already multichannel, e.g. when outputting over HDMI.
5+
6+
2. Spatial Mixer mode. This mode is used for 5.1 and 7.1 channel streams, when the output device supports spatial audio. This usually means
7+
the system knows that headphones are in use, or the built-in Macbook speakers are in use. Apple uses a specially tuned profile to enable
8+
a spatial effect from their laptop speakers.
9+
10+
There are a lot of knobs available in the mixer to describe how you want the rendering to be done, but I have hardcoded what seem
11+
to be Apple's recommended defaults. For example, I can find zero documentation about what the different SpatializationAlgorithm types do,
12+
and UseOutputType is the right choice, apparently picking the best algorithm for the target device.
13+
14+
kSpatializationAlgorithm_EqualPowerPanning
15+
kSpatializationAlgorithm_HRTF
16+
kSpatializationAlgorithm_SoundField
17+
kSpatializationAlgorithm_SphericalHead
18+
kSpatializationAlgorithm_StereoPassThrough
19+
kSpatializationAlgorithm_VectorBasedPanning
20+
kSpatializationAlgorithm_HRTFHQ
21+
kSpatializationAlgorithm_UseOutputType
22+
23+
The CoreAudio renderer was inspired by an example app in Apple's Audio Toolbox documentation:
24+
25+
https://developer.apple.com/documentation/audiotoolbox/generating_spatial_audio_from_a_multichannel_audio_stream
26+
27+
In theoery, any amount of channels with any layout can be processed by SpatialMixer, with 7.1.4 Atmos as Apple's example,
28+
in the form of a 12-channel WAV file. Interestingly, raw multichannel WAV files get automatically spatialized when played
29+
with QuickTime on macOS.
30+
31+
The design and program flow of the example app is overly complex, even though it only uses 2 AudioUnits: one in stereo for final output
32+
and one that is a SpatialMixer. Perhaps they really wanted to show off mixing Swift UI with advanced Obj-C++ using closures/lambdas.
33+
34+
I've left in some sections of the code that are platform-specific (iOS needs to use different audio APIs). This will
35+
hopefully make it easier to port this to moonlight-ios.
36+
37+
Apple example:
38+
39+
AudioFileReader->pullAudioBlock() <- N channel local WAV file (the example has a few 7.1.4 samples)
40+
rendering->mInputBlock()
41+
AudioUnitRender(mAUSM)
42+
mAUSM->process()
43+
Kernel->process()
44+
2-channel binaural out <- OutputAU
45+
46+
CoreAudioRenderer:
47+
48+
A thread-safe ring buffer is used, on one end is the Opus decoder which decodes 5ms Opus packets into PCM, 32 bits-per-channel.
49+
The reader is one of two AURenderCallback functions that are called by CoreAudio in a pull model.
50+
51+
renderCallbackDirect is the simple case: simply copy the PCM into the buffers being given to us by CoreAudio.
52+
This mode is able to pass the interleaved PCM unchanged to the OS.
53+
54+
In Spatial mode, renderCallbackSpatial uses an intermediate SpatialMixer, which it asks for 2-channel binaural PCM
55+
using m_SpatialAU.process(). m_SpatialAU is our AUSpatialRenderer class that contains a lot of setup and one callback.
56+
The process() method calls AudioUnitRender() which will have CoreAudio call inputCallback asking for 8 channels of PCM
57+
data for example. This is copied out of the ring buffer, where it is stored interleaved (each channel's data is together
58+
and makes up one frame) and needs to be transformed to non-interleaved format into 8 separate buffers. After this, the
59+
mixer does whatever it does, and process() returns. We're still in renderCallbackSpatial and it can deliver the final
60+
2-channel version to the final output.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CoreAudioRenderer TODO
2+
----------------------
3+
Test the lower-quality surround modes that have coupled streams, is that broken on headphones or when spatialized?
4+
From https://people.xiph.org/~xiphmont/demo/opus/demo3.shtml
5+
6+
Surround masking takes advantage of cross-channel masking between free-field loudspeakers. Obviously, we can't do that for stereo, as stereo is often listened to on headphones or nearfield monitors, but for surround encodings played on typical surround placements with listeners placed well within the soundfield, there's considerable savings to be had by assuming freefield masking. We only need to make slight modifications to ensure that the encode still sounds good when downmixed to stereo for playback on non-surround systems.
7+
8+
Refactor into more logical/cleaner C++ classes.
9+
Refactor audio stats code and implement for other backends.

0 commit comments

Comments
 (0)