Skip to content

Commit bf5e365

Browse files
Update Speech to text integration tests
1 parent 769ec64 commit bf5e365

File tree

6 files changed

+1389
-9
lines changed

6 files changed

+1389
-9
lines changed

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ public class SpeechToText extends WatsonService {
111111
private static final Type TYPE_LIST_MODELS = new TypeToken<List<SpeechModel>>() { }.getType();
112112
private static final Type TYPE_LIST_RECOGNITIONS = new TypeToken<List<RecognitionJob>>() { }.getType();
113113
private static final Type TYPE_SESSION_STATUS = new TypeToken<SpeechSessionStatus>() { }.getType();
114-
private static final Type TYPE_WORDS = new TypeToken<List<Word>>() { }.getType();
114+
private static final Type TYPE_WORDS = new TypeToken<List<WordData>>() { }.getType();
115115

116116
private static final String URL = "https://stream.watsonplatform.net/speech-to-text/api";
117117
private static final Gson GSON = GsonSingleton.getGsonWithoutPrettyPrinting();

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/Word.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*/
1313
package com.ibm.watson.developer_cloud.speech_to_text.v1.model;
1414

15+
import java.util.Arrays;
1516
import java.util.List;
1617

1718
import com.google.gson.annotations.SerializedName;
@@ -23,8 +24,14 @@
2324
*/
2425
public class Word extends GenericModel {
2526

27+
/**
28+
* The Enum Type.
29+
*/
2630
public enum Type {
27-
ALL, CORPORA, USER
31+
/** The all. */
32+
ALL, /** The corpora. */
33+
CORPORA, /** The user. */
34+
USER
2835
}
2936

3037
@SerializedName("display_as")
@@ -33,6 +40,25 @@ public enum Type {
3340
private List<String> soundsLike;
3441
private String word;
3542

43+
/**
44+
* Instantiates a new word.
45+
*/
46+
public Word() { }
47+
48+
/**
49+
* Instantiates a new word.
50+
*
51+
* @param word the spelling of the word is used to train the model.
52+
* @param displayAs the spelling of the custom word that the service uses to display the word in a transcript.
53+
* @param soundsLike An array of pronunciations for the custom word.
54+
*/
55+
public Word(String word, String displayAs, String... soundsLike) {
56+
this();
57+
this.word = word;
58+
this.displayAs = displayAs;
59+
this.soundsLike = Arrays.asList(soundsLike);
60+
}
61+
3662
/**
3763
* Gets the spelling of the custom word that the service uses to display the word in a transcript.
3864
*
@@ -52,7 +78,7 @@ public List<String> getSoundsLike() {
5278
}
5379

5480
/**
55-
* Gets the word.
81+
* Gets the spelling of the word is used to train the model.
5682
*
5783
* @return The word
5884
*/

tests/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word;
4848
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word.Type;
4949
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.WordData;
50+
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Corpus.Status;
5051
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback;
5152

5253
/**
@@ -55,6 +56,9 @@
5556
public class SpeechToTextIT extends WatsonServiceTest {
5657

5758
private static final String EN_BROADBAND16K = "en-US_BroadbandModel";
59+
private static final String SPEECH_RESOURCE = "src/test/resources/speech_to_text/%s";
60+
private static final String SAMPLE_WAV = String.format(SPEECH_RESOURCE, "sample1.wav");
61+
5862
private CountDownLatch lock = new CountDownLatch(1);
5963
private SpeechToText service;
6064
private SpeechResults asyncResults;
@@ -173,7 +177,7 @@ public void testGetRecognizeStatus() {
173177
*/
174178
@Test
175179
public void testRecognizeFileString() {
176-
File audio = new File("src/test/resources/speech_to_text/sample1.wav");
180+
File audio = new File(SAMPLE_WAV);
177181
SpeechResults results = service.recognize(audio).execute();
178182
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
179183
}
@@ -183,7 +187,7 @@ public void testRecognizeFileString() {
183187
*/
184188
@Test
185189
public void testRecognizeFileStringRecognizeOptions() {
186-
File audio = new File("src/test/resources/speech_to_text/sample1.wav");
190+
File audio = new File(SAMPLE_WAV);
187191
String contentType = HttpMediaType.AUDIO_WAV;
188192
RecognizeOptions options = new RecognizeOptions.Builder().continuous(true).timestamps(true).wordConfidence(true)
189193
.model(EN_BROADBAND16K).contentType(contentType).profanityFilter(false).build();
@@ -205,7 +209,7 @@ public void testRecognizeKeywords() {
205209
new RecognizeOptions.Builder().contentType("audio/wav").model(SpeechModel.EN_US_BROADBANDMODEL.getName())
206210
.continuous(true).inactivityTimeout(500).keywords(keyword1, keyword2).keywordsThreshold(0.7).build();
207211

208-
final File audio = new File("src/test/resources/speech_to_text/sample1.wav");
212+
final File audio = new File(SAMPLE_WAV);
209213
final SpeechResults results = service.recognize(audio, options).execute();
210214
final Transcript transcript = results.getResults().get(0);
211215

@@ -240,7 +244,7 @@ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedEx
240244
RecognizeOptions options = new RecognizeOptions.Builder().continuous(true).interimResults(true)
241245
.inactivityTimeout(40).timestamps(true).maxAlternatives(2).wordAlternativesThreshold(0.5).model(EN_BROADBAND16K)
242246
.contentType(HttpMediaType.AUDIO_WAV).build();
243-
FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/sample1.wav");
247+
FileInputStream audio = new FileInputStream(SAMPLE_WAV);
244248

245249
service.recognizeUsingWebSocket(audio, options, new BaseRecognizeCallback() {
246250

@@ -287,7 +291,7 @@ public void onTranscription(SpeechResults speechResults) {
287291
*/
288292
@Test
289293
public void testCreateRecognitionJob() throws InterruptedException, FileNotFoundException {
290-
File audio = new File("src/test/resources/speech_to_text/sample1.wav");
294+
File audio = new File(SAMPLE_WAV);
291295
RecognitionJob job = service.createRecognitionJob(audio, null, null).execute();
292296
try {
293297
assertNotNull(job.getId());
@@ -418,4 +422,64 @@ public void testAddWords() throws FileNotFoundException {
418422
service.trainCustomization(customizationId, null);
419423
}
420424

425+
/**
426+
* Test customization.
427+
*
428+
* @throws InterruptedException the interrupted exception
429+
*/
430+
@Test
431+
@Ignore
432+
public void testCustomization() throws InterruptedException {
433+
// 1 create customization
434+
Customization myCustomization =
435+
service.createCustomization("IEEE-test", SpeechModel.EN_US_BROADBANDMODEL, null).execute();
436+
String id = myCustomization.getId();
437+
438+
// 2 Add a corpus file to the model:
439+
service
440+
.addTextToCustomizationCorpus(id, "corpus-1", false, new File("src/test/resources/speech_to_text/corpus1.txt"))
441+
.execute();
442+
443+
// 3 Get corpora
444+
List<Corpus> corpora = service.getCorpora(id).execute();
445+
446+
// There is only one corpus so far so choose it
447+
Corpus corpus = corpora.get(0);
448+
449+
for (int x = 0; x < 30 && corpus.getStatus() != Status.ANALYZED; x++) {
450+
corpus = service.getCorpora(id).execute().get(0);
451+
Thread.sleep(5000);
452+
}
453+
454+
// Now add some user words to the custom model
455+
service.addWord(id, new Word("IEEE", "IEEE", "I. triple E.")).execute();
456+
service.addWord(id, new Word("hhonors", "IEEE", "H. honors", "Hilton honors")).execute();
457+
458+
// Display all words in the words resource (coming from OOVs from the corpus add and the new words just added)
459+
List<WordData> result = service.getWords(id, Word.Type.ALL).execute();
460+
for (WordData word : result) {
461+
System.out.println(word);
462+
}
463+
464+
// Now start training of the model
465+
service.trainCustomization(id, Customization.WordTypeToAdd.ALL).execute();
466+
467+
for (int x = 0; x < 30 && myCustomization.getStatus() != Customization.Status.AVAILABLE; x++) {
468+
myCustomization = service.getCustomization(id).execute();
469+
Thread.sleep(10000);
470+
}
471+
472+
File audio = new File(SAMPLE_WAV);
473+
RecognizeOptions options = new RecognizeOptions.Builder().continuous(true)
474+
.model(SpeechModel.EN_US_BROADBANDMODEL.getName()).customizationId(id).build();
475+
476+
// First decode WITHOUT the custom model
477+
SpeechResults transcript = service.recognize(audio).execute();
478+
System.out.println(transcript);
479+
480+
// Now decode with the custom model
481+
transcript = service.recognize(audio, options).execute();
482+
483+
System.out.println(transcript);
484+
}
421485
}

tests/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Customization.WordTypeToAdd;
4545
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognitionJob;
4646
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions;
47-
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions.Builder;
4847
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechAlternative;
4948
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechModel;
5049
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
182 KB
Binary file not shown.

0 commit comments

Comments
 (0)