Skip to content

Commit dd6b5a1

Browse files
committed
feat(Text to Speech): Initial commit of code for synthesizeUsingWebSocket
1 parent e6d6537 commit dd6b5a1

File tree

12 files changed

+582
-0
lines changed

12 files changed

+582
-0
lines changed

text-to-speech/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeech.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,16 @@
3939
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.VoiceModels;
4040
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Voices;
4141
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Words;
42+
import com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback;
43+
import com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.TextToSpeechWebSocketListener;
4244
import com.ibm.watson.developer_cloud.util.GsonSingleton;
4345
import com.ibm.watson.developer_cloud.util.ResponseConverterUtils;
4446
import com.ibm.watson.developer_cloud.util.Validator;
47+
import okhttp3.HttpUrl;
48+
import okhttp3.OkHttpClient;
49+
import okhttp3.Request;
50+
import okhttp3.WebSocket;
51+
4552
import java.io.InputStream;
4653

4754
/**
@@ -199,6 +206,29 @@ public ServiceCall<InputStream> synthesize(SynthesizeOptions synthesizeOptions)
199206
return createServiceCall(builder.build(), ResponseConverterUtils.getInputStream());
200207
}
201208

209+
public WebSocket synthesizeUsingWebSocket(SynthesizeOptions synthesizeOptions, SynthesizeCallback callback) {
210+
Validator.notNull(synthesizeOptions, "synthesizeOptions cannot be null");
211+
Validator.notNull(callback, "callback cannot be null");
212+
213+
HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/synthesize").newBuilder();
214+
215+
if (synthesizeOptions.voice() != null) {
216+
urlBuilder.addQueryParameter("voice", synthesizeOptions.voice());
217+
}
218+
if (synthesizeOptions.customizationId() != null) {
219+
urlBuilder.addQueryParameter("customization_id", synthesizeOptions.customizationId());
220+
}
221+
222+
String url = urlBuilder.toString().replace("https://", "wss://");
223+
Request.Builder builder = new Request.Builder().url(url);
224+
225+
setAuthentication(builder);
226+
setDefaultHeaders(builder);
227+
228+
OkHttpClient client = configureHttpClient();
229+
return client.newWebSocket(builder.build(), new TextToSpeechWebSocketListener(synthesizeOptions, callback));
230+
}
231+
202232
/**
203233
* Get pronunciation.
204234
*
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.model;
2+
3+
import com.google.gson.annotations.JsonAdapter;
4+
import com.ibm.watson.developer_cloud.service.model.GenericModel;
5+
import com.ibm.watson.developer_cloud.text_to_speech.v1.util.MarkTimingTypeAdapter;
6+
7+
@JsonAdapter(MarkTimingTypeAdapter.class)
8+
public class MarkTiming extends GenericModel{
9+
private String mark;
10+
private Double time;
11+
12+
public String getMark() {
13+
return mark;
14+
}
15+
16+
public Double getTime() {
17+
return time;
18+
}
19+
20+
public void setMark(String mark) {
21+
this.mark = mark;
22+
}
23+
24+
public void setTime(Double time) {
25+
this.time = time;
26+
}
27+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.model;
2+
3+
import com.ibm.watson.developer_cloud.service.model.GenericModel;
4+
5+
import java.util.List;
6+
7+
public class Marks extends GenericModel {
8+
private List<MarkTiming> marks;
9+
10+
public List<MarkTiming> getMarks() {
11+
return marks;
12+
}
13+
}

text-to-speech/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/model/SynthesizeOptions.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import com.ibm.watson.developer_cloud.service.model.GenericModel;
1616
import com.ibm.watson.developer_cloud.util.Validator;
1717

18+
import java.util.List;
19+
1820
/**
1921
* The synthesize options.
2022
*/
@@ -94,6 +96,7 @@ public interface Voice {
9496
private String accept;
9597
private String voice;
9698
private String customizationId;
99+
private List<String> timings;
97100

98101
/**
99102
* Builder.
@@ -103,12 +106,14 @@ public static class Builder {
103106
private String accept;
104107
private String voice;
105108
private String customizationId;
109+
private List<String> timings;
106110

107111
private Builder(SynthesizeOptions synthesizeOptions) {
108112
text = synthesizeOptions.text;
109113
accept = synthesizeOptions.accept;
110114
voice = synthesizeOptions.voice;
111115
customizationId = synthesizeOptions.customizationId;
116+
timings = synthesizeOptions.timings;
112117
}
113118

114119
/**
@@ -178,6 +183,17 @@ public Builder customizationId(String customizationId) {
178183
this.customizationId = customizationId;
179184
return this;
180185
}
186+
187+
/**
188+
* Set the timings.
189+
*
190+
* @param timings the timings
191+
* @return the SynthesizeOptions builder
192+
*/
193+
public Builder timings(List<String> timings) {
194+
this.timings = timings;
195+
return this;
196+
}
181197
}
182198

183199
private SynthesizeOptions(Builder builder) {
@@ -186,6 +202,7 @@ private SynthesizeOptions(Builder builder) {
186202
accept = builder.accept;
187203
voice = builder.voice;
188204
customizationId = builder.customizationId;
205+
timings = builder.timings;
189206
}
190207

191208
/**
@@ -247,4 +264,20 @@ public String voice() {
247264
public String customizationId() {
248265
return customizationId;
249266
}
267+
268+
/**
269+
* Gets the timings.
270+
*
271+
* An array that specifies whether the service is to return word timing information for all strings of the input
272+
* text. Specify `words` as the element of the array to request word timing information. The service returns the
273+
* start and end time of each word of the input. Specify an empty array or omit the parameter to receive no word
274+
* timing information. Not supported for Japanese input text.
275+
*
276+
* NOTE: This parameter only works for the `synthesizeUsingWebSocket` method.
277+
*
278+
* @return the timings
279+
*/
280+
public List<String> timings() {
281+
return timings;
282+
}
250283
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.model;
2+
3+
import com.ibm.watson.developer_cloud.service.model.GenericModel;
4+
5+
import java.util.List;
6+
7+
public class Timings extends GenericModel {
8+
private List<WordTiming> words;
9+
10+
public List<WordTiming> getWords() {
11+
return words;
12+
}
13+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.model;
2+
3+
import com.google.gson.annotations.JsonAdapter;
4+
import com.ibm.watson.developer_cloud.service.model.GenericModel;
5+
import com.ibm.watson.developer_cloud.text_to_speech.v1.util.WordTimingTypeAdapter;
6+
7+
@JsonAdapter(WordTimingTypeAdapter.class)
8+
public class WordTiming extends GenericModel {
9+
private String word;
10+
private Double startTime;
11+
private Double endTime;
12+
13+
public String getWord() {
14+
return word;
15+
}
16+
17+
public Double getStartTime() {
18+
return startTime;
19+
}
20+
21+
public Double getEndTime() {
22+
return endTime;
23+
}
24+
25+
public void setWord(String word) {
26+
this.word = word;
27+
}
28+
29+
public void setStartTime(Double startTime) {
30+
this.startTime = startTime;
31+
}
32+
33+
public void setEndTime(Double endTime) {
34+
this.endTime = endTime;
35+
}
36+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.util;
2+
3+
import com.google.gson.TypeAdapter;
4+
import com.google.gson.stream.JsonReader;
5+
import com.google.gson.stream.JsonToken;
6+
import com.google.gson.stream.JsonWriter;
7+
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.MarkTiming;
8+
9+
import java.io.IOException;
10+
11+
public class MarkTimingTypeAdapter extends TypeAdapter<MarkTiming> {
12+
/*
13+
* (non-Javadoc)
14+
* @see com.google.gson.TypeAdapter#read(com.google.gson.stream.JsonReader)
15+
*/
16+
@Override
17+
public MarkTiming read(JsonReader in) throws IOException {
18+
if (in.peek() == JsonToken.NULL) {
19+
in.nextNull();
20+
return null;
21+
}
22+
23+
final MarkTiming markTiming = new MarkTiming();
24+
in.beginArray();
25+
26+
if (in.peek() == JsonToken.STRING) {
27+
markTiming.setMark(in.nextString());
28+
}
29+
if (in.peek() == JsonToken.NUMBER) {
30+
markTiming.setTime(in.nextDouble());
31+
}
32+
33+
in.endArray();
34+
return markTiming;
35+
}
36+
37+
/*
38+
* (non-Javadoc)
39+
* @see com.google.gson.TypeAdapter#write(com.google.gson.stream.JsonWriter, java.lang.Object)
40+
*/
41+
@Override
42+
public void write(JsonWriter out, MarkTiming markTiming) throws IOException {
43+
out.beginArray();
44+
45+
out.value(markTiming.getMark());
46+
out.value(markTiming.getTime());
47+
48+
out.endArray();
49+
out.flush();
50+
}
51+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.util;
2+
3+
import com.google.gson.TypeAdapter;
4+
import com.google.gson.stream.JsonReader;
5+
import com.google.gson.stream.JsonToken;
6+
import com.google.gson.stream.JsonWriter;
7+
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.WordTiming;
8+
9+
import java.io.IOException;
10+
11+
public class WordTimingTypeAdapter extends TypeAdapter<WordTiming> {
12+
/*
13+
* (non-Javadoc)
14+
* @see com.google.gson.TypeAdapter#read(com.google.gson.stream.JsonReader)
15+
*/
16+
@Override
17+
public WordTiming read(JsonReader in) throws IOException {
18+
if (in.peek() == JsonToken.NULL) {
19+
in.nextNull();
20+
return null;
21+
}
22+
23+
final WordTiming wordTiming = new WordTiming();
24+
in.beginArray();
25+
26+
if (in.peek() == JsonToken.STRING) {
27+
wordTiming.setWord(in.nextString());
28+
}
29+
if (in.peek() == JsonToken.NUMBER) {
30+
wordTiming.setStartTime(in.nextDouble());
31+
}
32+
if (in.peek() == JsonToken.NUMBER) {
33+
wordTiming.setEndTime(in.nextDouble());
34+
}
35+
36+
in.endArray();
37+
return wordTiming;
38+
}
39+
40+
/*
41+
* (non-Javadoc)
42+
* @see com.google.gson.TypeAdapter#write(com.google.gson.stream.JsonWriter, java.lang.Object)
43+
*/
44+
@Override
45+
public void write(JsonWriter out, WordTiming wordTiming) throws IOException {
46+
out.beginArray();
47+
48+
out.value(wordTiming.getWord());
49+
out.value(wordTiming.getStartTime());
50+
out.value(wordTiming.getEndTime());
51+
52+
out.endArray();
53+
out.flush();
54+
}
55+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package com.ibm.watson.developer_cloud.text_to_speech.v1.websocket;
2+
3+
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Marks;
4+
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Timings;
5+
6+
import java.util.logging.Level;
7+
import java.util.logging.Logger;
8+
9+
public class BaseSynthesizeCallback implements SynthesizeCallback {
10+
private static final Logger LOG = Logger.getLogger(BaseSynthesizeCallback.class.getName());
11+
12+
/*
13+
* (non-Javadoc)
14+
* @see com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onConnected()
15+
*/
16+
public void onConnected() {}
17+
18+
/*
19+
* (non-Javadoc)
20+
* @see
21+
* com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onError(java.lang
22+
* .Exception)
23+
*/
24+
public void onError(Exception e) {
25+
LOG.log(Level.SEVERE, e.getMessage(), e);
26+
}
27+
28+
/*
29+
* (non-Javadoc)
30+
* @see
31+
* com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onWarning(java.lang
32+
* .Exception)
33+
*/
34+
public void onWarning(Exception e) {
35+
LOG.log(Level.WARNING, e.getMessage(), e);
36+
}
37+
38+
/*
39+
* (non-Javadoc)
40+
* @see
41+
* com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onDisconnected()
42+
*/
43+
public void onDisconnected() {}
44+
45+
/*
46+
* (non-Javadoc)
47+
* @see com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onContentType()
48+
*/
49+
@Override
50+
public void onContentType(String contentType) {}
51+
52+
/*
53+
* (non-Javadoc)
54+
* @see com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onTimings()
55+
*/
56+
@Override
57+
public void onTimings(Timings timings) {}
58+
59+
/*
60+
* (non-Javadoc)
61+
* @see com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onMarks()
62+
*/
63+
@Override
64+
public void onMarks(Marks marks) {}
65+
66+
/*
67+
* (non-Javadoc)
68+
* @see com.ibm.watson.developer_cloud.text_to_speech.v1.websocket.SynthesizeCallback#onAudioStream()
69+
*/
70+
@Override
71+
public void onAudioStream(byte[] bytes) {}
72+
}

0 commit comments

Comments
 (0)