Skip to content

Commit 86af70a

Browse files
author
Bruno Berisso
committed
Add a new API to add words to the recognition dictionary on runtime. Be aware that new words can't be added while a recognition is in progress. You should add new words before start a recognition process.
The API expect an array of tuples of String with the form: (word: 'HELLO', phones: 'HH EH L OW'). The first component is the word in plain English. The second is the pronunciation phones as appear in the cmudict (more here: http://www.speech.cs.cmu.edu/tools/lextool.html) In the future the second component should be calculated
1 parent 3ac8582 commit 86af70a

File tree

4 files changed

+101
-37
lines changed

4 files changed

+101
-37
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FORWARD F AO R W ER D
2+
GO G OW
3+
TEN T EH N
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
Language model created by QuickLM on Mon Jan 23 09:11:27 EST 2017
2+
Copyright (c) 1996-2010 Carnegie Mellon University and Alexander I. Rudnicky
3+
4+
The model is in standard ARPA format, designed by Doug Paul while he was at MITRE.
5+
6+
The code that was used to produce this language model is available in Open Source.
7+
Please visit http://www.speech.cs.cmu.edu/tools/ for more information
8+
9+
The (fixed) discount mass is 0.5. The backoffs are computed using the ratio method.
10+
This model based on a corpus of 1 sentences and 5 words
11+
12+
\data\
13+
ngram 1=5
14+
ngram 2=4
15+
ngram 3=3
16+
17+
\1-grams:
18+
-1.0000 </s> -0.3010
19+
-1.0000 <s> -0.2553
20+
-1.0000 FORWARD -0.2553
21+
-1.0000 GO -0.2553
22+
-1.0000 TEN -0.2553
23+
24+
\2-grams:
25+
-0.3010 <s> GO 0.0000
26+
-0.3010 FORWARD TEN 0.0000
27+
-0.3010 GO FORWARD 0.0000
28+
-0.3010 TEN </s> -0.3010
29+
30+
\3-grams:
31+
-0.3010 <s> GO FORWARD
32+
-0.3010 FORWARD TEN </s>
33+
-0.3010 GO FORWARD TEN
34+
35+
\end\

TLSphinx/Decoder.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,16 @@ public final class Decoder {
239239
engine.stop()
240240
engine = nil
241241
}
242+
243+
public func add(words:Array<(word: String, phones: String)>) throws {
244+
245+
guard engine == nil || !engine.isRunning else {
246+
throw DecodeErrors.CantAddWordsWhileDecodeingSpeech
247+
}
248+
249+
for (word,phones) in words {
250+
let update = words.last?.word == word ? STrue32 : SFalse32
251+
ps_add_word(psDecoder, word, phones, update)
252+
}
253+
}
242254
}

TLSphinxTests/Basic.swift

Lines changed: 51 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -90,47 +90,61 @@ class BasicTests: XCTestCase {
9090
expectation.fulfill()
9191
}
9292

93-
if let modelPath = getModelPath() {
94-
95-
let hmm = (modelPath as NSString).appendingPathComponent("en-us")
96-
let lm = (modelPath as NSString).appendingPathComponent("en-us.lm.dmp")
97-
let dict = (modelPath as NSString).appendingPathComponent("cmudict-en-us.dict")
98-
99-
if let config = Config(args: ("-hmm", hmm), ("-lm", lm), ("-dict", dict)) {
100-
if let decoder = Decoder(config:config) {
101-
102-
let audioFile = (modelPath as NSString).appendingPathComponent("goforward.raw")
103-
let expectation = self.expectation(description: "Decode finish")
104-
105-
decoder.decodeSpeechAtPath(audioFile) {
106-
107-
if let hyp = $0 {
108-
109-
print("Text: \(hyp.text) - Score: \(hyp.score)")
110-
XCTAssert(hyp.text == "go forward ten meters", "Pass")
111-
112-
} else {
113-
XCTFail("Fail to decode audio")
114-
}
115-
116-
expectation.fulfill()
93+
waitForExpectations(timeout: NSTimeIntervalSince1970)
94+
}
95+
96+
func testAddWordToLenguageModel() {
97+
98+
guard let modelPath = getModelPath() else {
99+
XCTFail("Can't access pocketsphinx model. Bundle root: \(Bundle.main)")
100+
return
101+
}
102+
103+
let basicModelPath = (modelPath.appendingPathComponent("basic-lm") as NSString)
104+
let hmm = modelPath.appendingPathComponent("en-us")
105+
let lm = basicModelPath.appendingPathComponent("6844.lm")
106+
let dict = basicModelPath.appendingPathComponent("6844.dic")
107+
108+
guard let config = Config(args: ("-hmm", hmm), ("-lm", lm), ("-dict", dict)) else {
109+
XCTFail("Can't run test without a valid config")
110+
return
111+
}
112+
113+
guard let decoder = Decoder(config:config) else {
114+
XCTFail("Can't run test without a decoder")
115+
return
116+
}
117+
118+
let audioFile = modelPath.appendingPathComponent("goforward.raw")
119+
let expectation = self.expectation(description: "Decode finish")
120+
121+
try! decoder.decodeSpeech(atPath: audioFile) { [unowned decoder] in
122+
123+
if let hyp = $0 {
124+
125+
print("Text: \(hyp.text) - Score: \(hyp.score)")
126+
XCTAssert(hyp.text == "GO FORWARD TEN", "Pass")
127+
128+
try! decoder.add(words:[("METERS","M IY T ER Z")])
129+
130+
try! decoder.decodeSpeech(atPath: audioFile) {
131+
if let hyp = $0 {
132+
133+
print("Text: \(hyp.text) - Score: \(hyp.score)")
134+
XCTAssert(hyp.text == "GO FORWARD TEN METERS", "Pass")
135+
} else {
136+
XCTFail("Fail to decode audio")
117137
}
118-
119-
waitForExpectations(timeout: NSTimeIntervalSince1970, handler: { (_) -> Void in
120-
121-
})
122-
123-
} else {
124-
XCTFail("Can't run test without a decoder")
138+
139+
expectation.fulfill()
125140
}
126-
141+
127142
} else {
128-
XCTFail("Can't run test without a valid config")
143+
XCTFail("Fail to decode audio")
144+
expectation.fulfill()
129145
}
130-
131-
} else {
132-
XCTFail("Can't access pocketsphinx model. Bundle root: \(Bundle.main)")
133146
}
134-
147+
148+
waitForExpectations(timeout: NSTimeIntervalSince1970)
135149
}
136150
}

0 commit comments

Comments
 (0)