@@ -3,51 +3,21 @@ import Foundation
33
44public enum MicrophoneError : Error {
55 case conversionFailed( details: String )
6+ case setupFailed( details: String )
67}
8+
79public class Microphone {
810 public static let sampleRate : Double = 44100
911 public static let isLinear16PCM : Bool = true
10- // Linear16 PCM is a standard format well-supported by EVI (although you must send
11- // a `session_settings` message to inform EVI of the sample rate). Because there is
12- // a wide variance of the native format/ sample rate from input devices, we use the
13- // AVAudioConverter API to convert the audio to this standard format in order to
14- // remove all guesswork.
1512 private static let desiredInputFormat = AVAudioFormat ( commonFormat: . pcmFormatInt16, sampleRate: sampleRate, channels: 1 , interleaved: false ) !
1613
17- public var audioEngine : AVAudioEngine
18- private var inputNode : AVAudioInputNode
14+ private var audioEngine : AVAudioEngine ?
15+ private var inputNode : AVAudioInputNode ?
1916 private var isMuted : Bool = false
2017 private var onError : ( ( MicrophoneError ) -> Void ) ?
2118
2219 public init ( ) {
2320 self . isMuted = false
24- self . audioEngine = AVAudioEngine ( )
25- self . inputNode = audioEngine. inputNode
26-
27- do {
28- let outputNode : AVAudioOutputNode = audioEngine. outputNode
29- let mainMixerNode : AVAudioMixerNode = audioEngine. mainMixerNode
30- audioEngine. connect ( mainMixerNode, to: outputNode, format: nil )
31-
32- // Voice processing is a feature that can help reduce echo and background noise
33- // It is very important for audio chat applications like EVI, because without
34- // echo cancellation, EVI will hear its own output and attempt to respond to it.
35-
36- // `setVoiceProcessingEnabled` should be enabled on *both* the input and output nodes
37- // because it works by observing signals that are sent to the output node (the
38- // speaker) and then "cancels" the echoes of those signals from what comes
39- // back into the input node (the microphone).
40- try self . inputNode. setVoiceProcessingEnabled ( true )
41- try outputNode. setVoiceProcessingEnabled ( true )
42-
43- if #available( iOS 17 . 0 , * ) {
44- let duckingConfig = AVAudioVoiceProcessingOtherAudioDuckingConfiguration ( enableAdvancedDucking: false , duckingLevel: . max)
45- inputNode. voiceProcessingOtherAudioDuckingConfiguration = duckingConfig
46- }
47- } catch {
48- print ( " Error setting voice processing: \( error) " )
49- return
50- }
5121 }
5222
5323 public func onError( _ onError: @escaping ( MicrophoneError ) -> Void ) {
@@ -62,23 +32,53 @@ public class Microphone {
6232 self . isMuted = false
6333 }
6434
35+ private func setupAudioEngine( ) throws {
36+ self . audioEngine = AVAudioEngine ( )
37+ guard let audioEngine = self . audioEngine else {
38+ throw MicrophoneError . setupFailed ( details: " Failed to create audio engine " )
39+ }
40+
41+ self . inputNode = audioEngine. inputNode
42+ guard let inputNode = self . inputNode else {
43+ throw MicrophoneError . setupFailed ( details: " Failed to get input node " )
44+ }
45+
46+ let outputNode : AVAudioOutputNode = audioEngine. outputNode
47+ let mainMixerNode : AVAudioMixerNode = audioEngine. mainMixerNode
48+ audioEngine. connect ( mainMixerNode, to: outputNode, format: nil )
49+
50+ try inputNode. setVoiceProcessingEnabled ( true )
51+ try outputNode. setVoiceProcessingEnabled ( true )
52+
53+ if #available( iOS 17 . 0 , * ) {
54+ let duckingConfig = AVAudioVoiceProcessingOtherAudioDuckingConfiguration ( enableAdvancedDucking: false , duckingLevel: . max)
55+ inputNode. voiceProcessingOtherAudioDuckingConfiguration = duckingConfig
56+ }
57+ }
58+
6559 public func startRecording( onBase64EncodedAudio: @escaping ( String ) -> Void ) throws {
66- let nativeInputFormat = self . inputNode. inputFormat ( forBus: 0 )
67- // The sample rate is "samples per second", so multiplying by 0.1 should get us chunks of about 100ms
60+ if audioEngine == nil {
61+ try setupAudioEngine ( )
62+ }
63+
64+ guard let audioEngine = self . audioEngine, let inputNode = self . inputNode else {
65+ throw MicrophoneError . setupFailed ( details: " Audio engine not properly initialized " )
66+ }
67+
68+ let nativeInputFormat = inputNode. inputFormat ( forBus: 0 )
6869 let inputBufferSize = UInt32 ( nativeInputFormat. sampleRate * 0.1 )
69- self . inputNode. installTap ( onBus: 0 , bufferSize: inputBufferSize, format: nativeInputFormat) { ( buffer, time) in
70+
71+ inputNode. installTap ( onBus: 0 , bufferSize: inputBufferSize, format: nativeInputFormat) { ( buffer, time) in
7072 let convertedBuffer = AVAudioPCMBuffer ( pcmFormat: Microphone . desiredInputFormat, frameCapacity: 1024 ) !
7173
7274 var error : NSError ? = nil
7375
7476 if self . isMuted {
75- // The standard behavior for muting is to send audio frames filled with empty data
76- // (versus not sending anything during mute). This helps audio systems distinguish
77- // between muted-but-still-active streams and streams that have become disconnected.
7877 let silence = Data ( repeating: 0 , count: Int ( convertedBuffer. frameCapacity) * Int( convertedBuffer. format. streamDescription. pointee. mBytesPerFrame) )
7978 onBase64EncodedAudio ( silence. base64EncodedString ( ) )
8079 return
8180 }
81+
8282 let inputAudioConverter = AVAudioConverter ( from: nativeInputFormat, to: Microphone . desiredInputFormat) !
8383 let status = inputAudioConverter. convert ( to: convertedBuffer, error: & error, withInputFrom: { inNumPackets, outStatus in
8484 outStatus. pointee = . haveData
@@ -106,7 +106,7 @@ public class Microphone {
106106 }
107107
108108 public func stopRecording( ) {
109- audioEngine. stop ( )
110- self . inputNode. removeTap ( onBus: 0 )
109+ audioEngine? . stop ( )
110+ inputNode? . removeTap ( onBus: 0 )
111111 }
112- }
112+ }
0 commit comments