Skip to content

Commit 94ba896

Browse files
committed
feat(text-to-speech): add support for using synthesize over a websocket connection
1 parent c76446d commit 94ba896

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

lib/synthesize-stream.ts

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
/**
2+
* Copyright 2014 IBM Corp. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License
15+
*/
16+
17+
import extend = require('extend');
18+
import pick = require('object.pick');
19+
import { Readable } from 'stream';
20+
import websocket = require ('websocket');
21+
import qs = require('./querystring');
22+
23+
const w3cWebSocket = websocket.w3cwebsocket;
24+
25+
const PAYLOAD_PARAMS_ALLOWED = [
26+
'text',
27+
'accept',
28+
'timings'
29+
];
30+
31+
const QUERY_PARAMS_ALLOWED = [
32+
'watson-token',
33+
'voice',
34+
'customization_id',
35+
'x-watson-learning-opt-out',
36+
'x-watson-metadata'
37+
];
38+
39+
interface SynthesizeStream extends Readable {
40+
_readableState;
41+
}
42+
43+
class SynthesizeStream extends Readable {
44+
45+
static WEBSOCKET_CONNECTION_ERROR: string = 'WebSocket connection error';
46+
47+
private options;
48+
private socket;
49+
private initialized: boolean;
50+
private authenticated: boolean;
51+
52+
constructor(options) {
53+
super(options);
54+
this.options = options;
55+
this.initialized = false;
56+
this.authenticated = options.token_manager ? false : true;
57+
}
58+
59+
initialize() {
60+
const options = this.options;
61+
62+
const queryParams = pick(options, QUERY_PARAMS_ALLOWED);
63+
const queryString = qs.stringify(queryParams);
64+
65+
const url =
66+
(options.url || 'wss://stream.watsonplatform.net/text-to-speech/api')
67+
.replace(/^http/, 'ws') +
68+
'/v1/synthesize' +
69+
queryString;
70+
71+
const socket = (this.socket = new w3cWebSocket(
72+
url,
73+
null,
74+
null,
75+
options.headers,
76+
null,
77+
{ tlsOptions: { rejectUnauthorized: options.rejectUnauthorized }}
78+
));
79+
80+
// use class context within arrow functions
81+
const self = this;
82+
83+
socket.onopen = () => {
84+
const payload = pick(options, PAYLOAD_PARAMS_ALLOWED);
85+
socket.send(JSON.stringify(payload));
86+
/**
87+
* emitted once the WebSocket connection has been established
88+
* @event SynthesizeStream#open
89+
*/
90+
self.emit('open');
91+
};
92+
93+
socket.onmessage = message => {
94+
const chunk = message.data;
95+
// some messages are strings - emit those unencoded, but push them to
96+
// the stream as binary
97+
const data = typeof chunk === 'string' ? chunk : Buffer.from(chunk);
98+
/**
99+
* Emit any messages received over the wire, mainly used for debugging.
100+
*
101+
* @event SynthesizeStream#message
102+
* @param {Object} message - frame object received from service
103+
* @param {Object} data - a data attribute of the frame that's either a string or a Buffer/TypedArray
104+
*/
105+
self.emit('message', message, data);
106+
self.push(Buffer.from(chunk));
107+
};
108+
109+
socket.onerror = event => {
110+
const err = new Error('WebSocket connection error');
111+
err.name = SynthesizeStream.WEBSOCKET_CONNECTION_ERROR;
112+
err['event'] = event;
113+
self.emit('error', err);
114+
self.push(null);
115+
};
116+
117+
socket.onclose = event => {
118+
self.push(null);
119+
/**
120+
* @event SynthesizeStream#close
121+
* @param {Number} reasonCode
122+
* @param {String} description
123+
*/
124+
self.emit('close', event.code, event.reason);
125+
};
126+
127+
this.initialized = true;
128+
}
129+
130+
_read() {
131+
// even though we aren't controlling the read from websocket,
132+
// we can take advantage of the fact that _read is async and hack
133+
// this funtion to retrieve a token if the service is using IAM auth
134+
this.setAuthorizationHeaderToken(err => {
135+
if (err) {
136+
this.emit('error', err);
137+
this.push(null);
138+
return;
139+
}
140+
141+
if (!this.initialized) {
142+
this.initialize();
143+
}
144+
});
145+
}
146+
147+
setAuthorizationHeaderToken(callback) {
148+
if (!this.authenticated) {
149+
this.options.token_manager.getToken((err, token) => {
150+
if (err) {
151+
callback(err);
152+
}
153+
const authHeader = { authorization: 'Bearer ' + token };
154+
this.options.headers = extend(authHeader, this.options.headers);
155+
this.authenticated = true;
156+
callback(null);
157+
});
158+
} else {
159+
callback(null);
160+
}
161+
}
162+
}
163+
164+
export = SynthesizeStream;

text-to-speech/v1.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import extend = require('extend');
2+
import SynthesizeStream = require('../lib/synthesize-stream');
13
import GeneratedTextToSpeechV1 = require('./v1-generated');
4+
// tslint:disable-next-line:no-var-requires
5+
const pkg = require('../package.json');
26

37
class TextToSpeechV1 extends GeneratedTextToSpeechV1 {
48
constructor(options) {
@@ -105,6 +109,43 @@ class TextToSpeechV1 extends GeneratedTextToSpeechV1 {
105109

106110
return wavFileData;
107111
};
112+
113+
/**
114+
* Use the synthesize function with a readable stream over websockets
115+
*
116+
* @param {Object} params The parameters
117+
* @return {SynthesizeStream}
118+
*/
119+
synthesizeUsingWebSocket(params) {
120+
params = params || {};
121+
params.url = this._options.url;
122+
123+
// if using iam, headers will not be a property on _options
124+
// and the line `authorization: this._options.headers.Authorization`
125+
// will crash the code
126+
if (!this._options.headers) {
127+
this._options.headers = {};
128+
}
129+
130+
// if using iam, pass the token manager to the SynthesizeStream object
131+
if (this.tokenManager) {
132+
params.token_manager = this.tokenManager;
133+
}
134+
135+
params.headers = extend(
136+
{
137+
'user-agent': pkg.name + '-nodejs-' + pkg.version,
138+
authorization: this._options.headers.Authorization
139+
},
140+
params.headers
141+
);
142+
143+
// allow user to disable ssl verification when using websockets
144+
params.rejectUnauthorized = this._options.rejectUnauthorized;
145+
146+
// SynthesizeStream.main(params);
147+
return new SynthesizeStream(params);
148+
}
108149
}
109150

110151
export = TextToSpeechV1;

0 commit comments

Comments
 (0)