Skip to content

Commit 3ac8582

Browse files
author
Bruno Berisso
committed
Add AVAudioConverter to try a new way of read data from the AVAudioEngin in live decoding. The idea is read the data in a more appealing format for iOS (float 32, 16000 Hz) and convert it (with AVAudioConverter) to the Sphinx format (int 16, 16000Hz). AVAudioConverter is only available from iOS 9.0 so the deployment traget needs to change.
1 parent 638599d commit 3ac8582

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

TLSphinx.xcodeproj/project.pbxproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@
308308
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
309309
GCC_WARN_UNUSED_FUNCTION = YES;
310310
GCC_WARN_UNUSED_VARIABLE = YES;
311-
IPHONEOS_DEPLOYMENT_TARGET = 8.3;
311+
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
312312
MTL_ENABLE_DEBUG_INFO = YES;
313313
ONLY_ACTIVE_ARCH = YES;
314314
SDKROOT = iphoneos;
@@ -352,7 +352,7 @@
352352
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
353353
GCC_WARN_UNUSED_FUNCTION = YES;
354354
GCC_WARN_UNUSED_VARIABLE = YES;
355-
IPHONEOS_DEPLOYMENT_TARGET = 8.3;
355+
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
356356
MTL_ENABLE_DEBUG_INFO = NO;
357357
SDKROOT = iphoneos;
358358
SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
@@ -382,7 +382,7 @@
382382
);
383383
INFOPLIST_FILE = TLSphinx/Info.plist;
384384
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
385-
IPHONEOS_DEPLOYMENT_TARGET = 8.2;
385+
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
386386
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
387387
LIBRARY_SEARCH_PATHS = (
388388
"$(PROJECT_DIR)/Sphinx/lib/pocketsphinx",
@@ -416,7 +416,7 @@
416416
);
417417
INFOPLIST_FILE = TLSphinx/Info.plist;
418418
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
419-
IPHONEOS_DEPLOYMENT_TARGET = 8.2;
419+
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
420420
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
421421
LIBRARY_SEARCH_PATHS = (
422422
"$(PROJECT_DIR)/Sphinx/lib/pocketsphinx",

TLSphinx/Decoder.swift

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,36 @@ public final class Decoder {
181181
throw DecodeErrors.NoAudioInputAvailable
182182
}
183183

184-
let formatIn = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100, channels: 1, interleaved: false)
185-
engine.connect(input, to: engine.outputNode, format: formatIn)
184+
let mixer = AVAudioMixerNode()
185+
engine.attach(mixer)
186+
engine.connect(input, to: mixer, format: input.outputFormat(forBus: 0))
186187

187-
input.installTap(onBus: 0, bufferSize: 4096, format: formatIn, block: { (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
188+
let formatIn = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: 1, interleaved: false)
189+
let formatOut = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: false)
190+
let bufferMapper = AVAudioConverter(from: formatIn, to: formatOut)
188191

189-
let audioData = buffer.toDate()
192+
mixer.installTap(onBus: 0, bufferSize: 2048, format: formatIn, block: {
193+
[unowned self] (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) in
194+
195+
let sphinxBuffer = AVAudioPCMBuffer(pcmFormat: formatOut, frameCapacity: buffer.frameCapacity)
196+
197+
//This is needed because the 'frameLenght' default value is 0 (since iOS 10) and cause the 'convert' call
198+
//to faile with an error (Error Domain=NSOSStatusErrorDomain Code=-50 "(null)")
199+
//More here: http://stackoverflow.com/questions/39714244/avaudioconverter-is-broken-in-ios-10
200+
sphinxBuffer.frameLength = sphinxBuffer.frameCapacity
201+
202+
do {
203+
try bufferMapper.convert(to: sphinxBuffer, from: buffer)
204+
} catch(let error as NSError) {
205+
print(error)
206+
return
207+
}
208+
209+
let audioData = sphinxBuffer.toData()
190210
self.process_raw(audioData)
191211

212+
print("Process: \(buffer.frameLength) frames - \(audioData.count) bytes - sample time: \(time.sampleTime)")
213+
192214
if self.speechState == .utterance {
193215

194216
self.end_utt()
@@ -202,9 +224,6 @@ public final class Decoder {
202224
}
203225
})
204226

205-
engine.mainMixerNode.outputVolume = 0.0
206-
engine.prepare()
207-
208227
start_utt()
209228

210229
do {
@@ -218,8 +237,6 @@ public final class Decoder {
218237

219238
public func stopDecodingSpeech () {
220239
engine.stop()
221-
engine.mainMixerNode.removeTap(onBus: 0)
222-
engine.reset()
223240
engine = nil
224241
}
225242
}

0 commit comments

Comments
 (0)