Skip to content

Commit daa630b

Browse files
committed
added speakerlessInterim option to speaker stream
1 parent ce18c7e commit daa630b

File tree

2 files changed

+119
-17
lines changed

2 files changed

+119
-17
lines changed

speech-to-text/speaker-stream.js

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ var noTimestamps = require('./no-timestamps');
3636
*
3737
* @constructor
3838
* @param {Object} options
39+
* @param {boolean} [options.speakerlessInterim=false] - emit interim results before initial speaker has been identified (allows UI to update more quickly)
3940
*/
4041
function SpeakerStream(options) {
4142
options = options || {};
4243
options.objectMode = true;
44+
this.options = options;
4345
Transform.call(this, options);
4446
/**
4547
* timestamps is a 2-d array.
@@ -80,7 +82,6 @@ function SpeakerStream(options) {
8082
* @private
8183
*/
8284
this.speaker_labels = [];
83-
8485
}
8586
util.inherits(SpeakerStream, Transform);
8687

@@ -96,8 +97,11 @@ var TO = 2;
9697

9798
SpeakerStream.ERROR_MISMATCH = 'MISMATCH';
9899

99-
100-
SpeakerStream.prototype.process = function() {
100+
/**
101+
* Builds a results object with everything we've got so far
102+
* @returns {*}
103+
*/
104+
SpeakerStream.prototype.buildMessage = function() {
101105
var final = this.isFinal();
102106
var errored = false;
103107

@@ -153,18 +157,8 @@ SpeakerStream.prototype.process = function() {
153157
return arr;
154158
}, []);
155159

156-
if (results.length) {
157-
/**
158-
* Emit an object similar to the normal results object, only with multiple entries in the results Array (a new one
159-
* each time the speaker changes), and with a speaker field on the results.
160-
*
161-
* result_index is always 0 because the results always includes the entire conversation so far.
162-
*
163-
* @event SpeakerStream#data
164-
* @param {Object} results-format message with multiple results and an extra speaker field on each result
165-
*/
166-
this.push({results: results, result_index: 0});
167-
}
160+
// result_index is always 0 because the results always includes the entire conversation so far.
161+
return {results: results, result_index: 0};
168162
};
169163

170164
/**
@@ -219,12 +213,29 @@ SpeakerStream.prototype.handleSpeakerLabels = function(data) {
219213
};
220214

221215
SpeakerStream.prototype._transform = function(data, encoding, next) {
216+
var message;
222217
if (Array.isArray(data.results)) {
223218
this.handleResults(data);
219+
if (this.options.speakerlessInterim && data.results.length && data.results[0].final === false) {
220+
message = this.buildMessage();
221+
message.results = message.results.concat(data.results);
222+
}
224223
}
225224
if (Array.isArray(data.speaker_labels)) {
226225
this.handleSpeakerLabels(data);
227-
this.process();
226+
message = this.buildMessage();
227+
}
228+
if (message) {
229+
/**
230+
* Emit an object similar to the normal results object, only with multiple entries in the results Array (a new one
231+
* each time the speaker changes), and with a speaker field on the results.
232+
*
233+
* result_index is always 0 because the results always includes the entire conversation so far.
234+
*
235+
* @event SpeakerStream#data
236+
* @param {Object} results-format message with multiple results and an extra speaker field on each result
237+
*/
238+
this.push(message);
228239
}
229240
next();
230241
};
@@ -241,7 +252,8 @@ SpeakerStream.prototype._flush = function(done) {
241252
if (this.timestamps.length && !this.speaker_labels.length) {
242253
msg = 'No speaker_labels found. SpeakerStream requires speaker_labels to be enabled.';
243254
} else {
244-
msg = 'Mismatch between number of word timestamps (' + this.timestamps.length + ') and number of speaker_labels (' + this.speaker_labels.length + ') - some data may be lost.';
255+
msg = 'Mismatch between number of word timestamps (' + this.timestamps.length + ') and number of speaker_labels (' +
256+
this.speaker_labels.length + ') - some data may be lost.';
245257
}
246258
var err = new Error(msg);
247259
err.name = SpeakerStream.ERROR_MISMATCH;

test/speaker-stream-spec.js

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,96 @@ describe('SpeakerStream', function() {
291291
});
292292

293293

294+
it('should provide early results when options.speakerlessInterim=true', function(done) {
295+
var stream = new SpeakerStream({speakerlessInterim: true});
296+
stream.on('error', done);
297+
var actual = [];
298+
stream.on('data', function(data) {
299+
actual.push(data);
300+
});
301+
302+
var expected = [{
303+
results: [{
304+
alternatives: [{
305+
timestamps: [
306+
['hi', 0.06, 0.28],
307+
],
308+
transcript: 'hi '
309+
}],
310+
final: false
311+
}],
312+
result_index: 0
313+
}, {
314+
results: [{
315+
speaker: 0,
316+
alternatives: [{
317+
timestamps: [
318+
['hi', 0.06, 0.28],
319+
],
320+
transcript: 'hi '
321+
}],
322+
final: true
323+
},
324+
{
325+
speaker: 1,
326+
alternatives: [{
327+
timestamps: [
328+
['hello', 0.28, 0.37],
329+
],
330+
transcript: 'hello '
331+
}],
332+
final: true
333+
}],
334+
result_index: 0
335+
}];
336+
337+
stream.on('end', function() {
338+
assert.deepEqual(actual, expected);
339+
done();
340+
});
341+
342+
stream.write({
343+
results: [{
344+
alternatives: [{
345+
timestamps: [
346+
['hi', 0.06, 0.28],
347+
],
348+
transcript: 'hi '
349+
}],
350+
final: false
351+
}],
352+
result_index: 0
353+
});
354+
stream.write({
355+
results: [{
356+
alternatives: [{
357+
timestamps: [
358+
['hi', 0.06, 0.28],
359+
['hello', 0.28, 0.37],
360+
],
361+
transcript: 'hi hello '
362+
}],
363+
final: true
364+
}],
365+
result_index: 0
366+
});
367+
stream.end({
368+
speaker_labels: [{
369+
from: 0.06,
370+
to: 0.28,
371+
speaker: 0,
372+
confidence: 0.512,
373+
final: false
374+
}, {
375+
from: 0.28,
376+
to: 0.37,
377+
speaker: 1,
378+
confidence: 0.512,
379+
final: true
380+
}]
381+
});
382+
});
383+
294384
describe('speakerLabelsSorter', function() {
295385
it('should correctly sort speaker labels by start time and then by end time', function() {
296386
var input = [

0 commit comments

Comments
 (0)