HumeAI
diff --git a/‎evi-flutter-example/audio/ios/Classes/AudioPlugin.swift‎
Lines changed: 4 additions & 4 deletions b/‎evi-flutter-example/audio/ios/Classes/AudioPlugin.swift‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎evi-flutter-example/audio/ios/Classes/Microphone.swift‎
Lines changed: 43 additions & 43 deletions b/‎evi-flutter-example/audio/ios/Classes/Microphone.swift‎
Lines changed: 43 additions & 43 deletions
@@ -3,8 +3,10 @@ import Flutter
 import UIKit
 
 public class AudioPlugin: NSObject, FlutterPlugin {
+    private lazy var microphone: Microphone = {
+        return Microphone()
+    }()
     private var soundPlayer: SoundPlayer
-    private var microphone: Microphone
 
     private var eventChannel: FlutterEventChannel?
     private var eventSink: FlutterEventSink?
@@ -47,9 +49,7 @@ public class AudioPlugin: NSObject, FlutterPlugin {
     }
 
     override init() {
-        self.microphone = Microphone()
         self.soundPlayer = SoundPlayer()
-
         super.init()
 
         self.soundPlayer.onError { [weak self] error in
@@ -167,4 +167,4 @@ extension AudioPlugin: FlutterStreamHandler {
         self.eventSink = nil
         return nil
     }
-}
+}
@@ -3,51 +3,21 @@ import Foundation
 
 public enum MicrophoneError: Error {
     case conversionFailed(details: String)
+    case setupFailed(details: String)
 }
+
 public class Microphone {
     public static let sampleRate: Double = 44100
     public static let isLinear16PCM: Bool = true
-    // Linear16 PCM is a standard format well-supported by EVI (although you must send
-    // a `session_settings` message to inform EVI of the sample rate). Because there is
-    // a wide variance of the native format/ sample rate from input devices, we use the 
-    // AVAudioConverter API to convert the audio to this standard format in order to
-    // remove all guesswork.
     private static let desiredInputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: sampleRate, channels: 1, interleaved: false)!
 
-    public var audioEngine: AVAudioEngine
-    private var inputNode: AVAudioInputNode
+    private var audioEngine: AVAudioEngine?
+    private var inputNode: AVAudioInputNode?
     private var isMuted: Bool = false
     private var onError: ((MicrophoneError) -> Void)?
 
     public init() {
         self.isMuted = false
-        self.audioEngine = AVAudioEngine()
-        self.inputNode = audioEngine.inputNode
-        
-        do {
-            let outputNode: AVAudioOutputNode = audioEngine.outputNode
-            let mainMixerNode: AVAudioMixerNode = audioEngine.mainMixerNode
-            audioEngine.connect(mainMixerNode, to: outputNode, format: nil)
-            
-            // Voice processing is a feature that can help reduce echo and background noise
-            // It is very important for audio chat applications like EVI, because without
-            // echo cancellation, EVI will hear its own output and attempt to respond to it.
-
-            // `setVoiceProcessingEnabled` should be enabled on *both* the input and output nodes
-            // because it works by observing signals that are sent to the output node (the
-            // speaker) and then "cancels" the echoes of those signals from what comes
-            // back into the input node (the microphone).
-            try self.inputNode.setVoiceProcessingEnabled(true)
-            try outputNode.setVoiceProcessingEnabled(true)
-            
-            if #available(iOS 17.0, *) {
-                let duckingConfig = AVAudioVoiceProcessingOtherAudioDuckingConfiguration(enableAdvancedDucking: false, duckingLevel: .max)
-                inputNode.voiceProcessingOtherAudioDuckingConfiguration = duckingConfig
-            }
-        } catch {
-            print("Error setting voice processing: \(error)")
-            return
-        }
     }
 
     public func onError(_ onError: @escaping (MicrophoneError) -> Void) {
@@ -62,23 +32,53 @@ public class Microphone {
         self.isMuted = false
     }
 
+    private func setupAudioEngine() throws {
+        self.audioEngine = AVAudioEngine()
+        guard let audioEngine = self.audioEngine else {
+            throw MicrophoneError.setupFailed(details: "Failed to create audio engine")
+        }
+        
+        self.inputNode = audioEngine.inputNode
+        guard let inputNode = self.inputNode else {
+            throw MicrophoneError.setupFailed(details: "Failed to get input node")
+        }
+        
+        let outputNode: AVAudioOutputNode = audioEngine.outputNode
+        let mainMixerNode: AVAudioMixerNode = audioEngine.mainMixerNode
+        audioEngine.connect(mainMixerNode, to: outputNode, format: nil)
+        
+        try inputNode.setVoiceProcessingEnabled(true)
+        try outputNode.setVoiceProcessingEnabled(true)
+        
+        if #available(iOS 17.0, *) {
+            let duckingConfig = AVAudioVoiceProcessingOtherAudioDuckingConfiguration(enableAdvancedDucking: false, duckingLevel: .max)
+            inputNode.voiceProcessingOtherAudioDuckingConfiguration = duckingConfig
+        }
+    }
+    
     public func startRecording(onBase64EncodedAudio: @escaping (String) -> Void) throws {
-        let nativeInputFormat = self.inputNode.inputFormat(forBus: 0)
-        // The sample rate is "samples per second", so multiplying by 0.1 should get us chunks of about 100ms
+        if audioEngine == nil {
+            try setupAudioEngine()
+        }
+        
+        guard let audioEngine = self.audioEngine, let inputNode = self.inputNode else {
+            throw MicrophoneError.setupFailed(details: "Audio engine not properly initialized")
+        }
+        
+        let nativeInputFormat = inputNode.inputFormat(forBus: 0)
         let inputBufferSize = UInt32(nativeInputFormat.sampleRate * 0.1)
-        self.inputNode.installTap(onBus: 0, bufferSize: inputBufferSize, format: nativeInputFormat) { (buffer, time) in
+        
+        inputNode.installTap(onBus: 0, bufferSize: inputBufferSize, format: nativeInputFormat) { (buffer, time) in
             let convertedBuffer = AVAudioPCMBuffer(pcmFormat: Microphone.desiredInputFormat, frameCapacity: 1024)!
 
             var error: NSError? = nil
 
             if self.isMuted {
-                // The standard behavior for muting is to send audio frames filled with empty data
-                // (versus not sending anything during mute). This helps audio systems distinguish
-                // between muted-but-still-active streams and streams that have become disconnected.
                 let silence = Data(repeating: 0, count: Int(convertedBuffer.frameCapacity) * Int(convertedBuffer.format.streamDescription.pointee.mBytesPerFrame))
                 onBase64EncodedAudio(silence.base64EncodedString())
                 return
             }
+            
             let inputAudioConverter = AVAudioConverter(from: nativeInputFormat, to: Microphone.desiredInputFormat)!
             let status = inputAudioConverter.convert(to: convertedBuffer, error: &error, withInputFrom: {inNumPackets, outStatus in
                 outStatus.pointee = .haveData
@@ -106,7 +106,7 @@ public class Microphone {
     }
 
     public func stopRecording() {
-        audioEngine.stop()
-        self.inputNode.removeTap(onBus: 0)
+        audioEngine?.stop()
+        inputNode?.removeTap(onBus: 0)
     }
-}
+}