Skip to content

Commit 8b819db

Browse files
authored
AISDK-202: Add Speaker Names option (#55)
1 parent ebdddc8 commit 8b819db

File tree

6 files changed

+163
-0
lines changed

6 files changed

+163
-0
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ segment.setStartTimestamp(2.0);
8888
segment.setEndTimestamp(100.5);
8989
options.setSegmentsToTranscribe(List.of(segment));
9090
91+
// optional speaker names
92+
SpeakerName speaker = new SpeakerName();
93+
speaker.setDisplayName('Alan Mathison Turing');
94+
options.setSpeakerNames(List.of(speaker));
95+
9196
RevAiJob revAiJob = apiClient.submitJobUrl(urlLinkToFile, options);
9297
```
9398

src/main/java/ai/rev/speechtotext/models/asynchronous/Monologue.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ public class Monologue {
1212
@SerializedName("speaker")
1313
private Integer speaker;
1414

15+
@SerializedName("speaker_info")
16+
private SpeakerInfo speakerInfo;
17+
1518
@SerializedName("elements")
1619
private List<Element> elements;
1720

@@ -33,6 +36,24 @@ public void setSpeaker(Integer speaker) {
3336
this.speaker = speaker;
3437
}
3538

39+
/**
40+
* Returns the speaker info for this monologue.
41+
*
42+
* @return The speaker info for this monologue.
43+
*/
44+
public SpeakerInfo getSpeakerInfo() {
45+
return speakerInfo;
46+
}
47+
48+
/**
49+
* Sets the speaker info for this monologue.
50+
*
51+
* @param speakerInfo Info about the speaker for this monologue.
52+
*/
53+
public void setSpeakerInfo(SpeakerInfo speakerInfo) {
54+
this.speakerInfo = speakerInfo;
55+
}
56+
3657
/**
3758
* Returns a list of {@link Element} objects.
3859
*

src/main/java/ai/rev/speechtotext/models/asynchronous/RevAiJobOptions.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@ public class RevAiJobOptions {
125125
@SerializedName("segments_to_transcribe")
126126
private List<SegmentToTranscribe> segmentsToTranscribe;
127127

128+
/**
129+
* Optional and only available with transcriber "human".
130+
* Specifies a list of names for the speakers in an audio file.
131+
*/
132+
@SerializedName("speaker_names")
133+
private List<SpeakerName> speakerNames;
134+
128135
/**
129136
* Returns the media url.
130137
*
@@ -514,4 +521,23 @@ public List<SegmentToTranscribe> getSegmentsToTranscribe() {
514521
public void setSegmentsToTranscribe(List<SegmentToTranscribe> segmentsToTranscribe) {
515522
this.segmentsToTranscribe = segmentsToTranscribe;
516523
}
524+
525+
/**
526+
* Returns the list of speaker names
527+
*
528+
* @return List of speaker names
529+
*/
530+
public List<SpeakerName> getSpeakerNames() {
531+
return speakerNames;
532+
}
533+
534+
/**
535+
* Specifies the list of speaker names in an audio file
536+
* This property is optional but can only be used with "human" transcriber.
537+
*
538+
* @param speakerNames List of speaker names
539+
*/
540+
public void setSpeakerNames(List<SpeakerName> speakerNames) {
541+
this.speakerNames = speakerNames;
542+
}
517543
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package ai.rev.speechtotext.models.asynchronous;
2+
3+
import com.google.gson.annotations.SerializedName;
4+
5+
/**
6+
* A SpeakerInfo object represents the information related to the speaker in a transcript
7+
* Used for speech-to-text jobs submitted to be transcribed by a human.
8+
* */
9+
public class SpeakerInfo {
10+
@SerializedName("id")
11+
private String id;
12+
13+
@SerializedName("display_name")
14+
private String displayName;
15+
16+
/**
17+
* Returns the id of the speaker
18+
*
19+
* @return the id of the speaker
20+
*/
21+
public String getIdName() {
22+
return id;
23+
}
24+
25+
/**
26+
* Sets the id of the speaker
27+
*
28+
* @param displayName the displayed name of the speaker
29+
*/
30+
public void setId(String id) {
31+
this.id = id;
32+
}
33+
34+
/**
35+
* Returns the displayed name of the speaker
36+
*
37+
* @return the displayed name of the speaker
38+
*/
39+
public String getDisplayName() {
40+
return displayName;
41+
}
42+
43+
/**
44+
* Sets the displayed name of the speaker
45+
*
46+
* @param displayName the displayed name of the speaker
47+
*/
48+
public void setDisplayName(String displayName) {
49+
this.displayName = displayName;
50+
}
51+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package ai.rev.speechtotext.models.asynchronous;
2+
3+
import com.google.gson.annotations.SerializedName;
4+
5+
/**
6+
* A object representing information provided about a speaker
7+
* Used for speech-to-text jobs submitted to be transcribed by a human.
8+
* */
9+
public class SpeakerName {
10+
@SerializedName("display_name")
11+
private String displayName;
12+
13+
/**
14+
* Returns the displayed name of the speaker
15+
*
16+
* @return the displayed name of the speaker
17+
*/
18+
public String getDisplayName() {
19+
return displayName;
20+
}
21+
22+
/**
23+
* Sets the displayed name of the speaker
24+
*
25+
* @param displayName the displayed name of the speaker
26+
*/
27+
public void setDisplayName(String displayName) {
28+
this.displayName = displayName;
29+
}
30+
}

src/test/java/ai/rev/speechtotext/unit/RevAiJobTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import ai.rev.speechtotext.models.asynchronous.RevAiJobOptions;
88
import ai.rev.speechtotext.models.asynchronous.RevAiJobStatus;
99
import ai.rev.speechtotext.models.asynchronous.RevAiJobType;
10+
import ai.rev.speechtotext.models.asynchronous.SpeakerName;
11+
import ai.rev.speechtotext.models.asynchronous.SegmentToTranscribe;
1012
import ai.rev.testutils.AssertHelper;
1113
import com.google.gson.Gson;
1214
import okhttp3.MediaType;
@@ -163,6 +165,20 @@ public void SubmitJobUrl_UrlAndOptionsAreSpecified_ReturnsARevAiJob() throws IOE
163165
options.setDeleteAfterSeconds(0);
164166
options.setLanguage("en");
165167
options.setTranscriber("machine_v2");
168+
options.setVerbatim(true);
169+
options.setRush(true);
170+
options.setTestMode(true);
171+
List<SegmentToTranscribe> segmentToTranscribeList = new ArrayList<>();
172+
SegmentToTranscribe segment = new SegmentToTranscribe();
173+
segment.setStartTimestamp(2.0);
174+
segment.setEndTimestamp(100.5);
175+
segmentToTranscribeList.add(segment);
176+
options.setSegmentsToTranscribe(segmentToTranscribeList);
177+
List<SpeakerName> speakerNamesList = new ArrayList<>();
178+
SpeakerName speaker = new SpeakerName();
179+
speaker.setDisplayName("Steve");
180+
speakerNamesList.add(speaker);
181+
options.setSpeakerNames(speakerNamesList);
166182

167183
RevAiJob revAiJob = sut.submitJobUrl(options);
168184

@@ -186,6 +202,20 @@ public void SubmitJobUrl_UrlAndOptionsAreSpecified_WithAuthHeaders_ReturnsARevAi
186202
options.setDeleteAfterSeconds(0);
187203
options.setLanguage("en");
188204
options.setTranscriber("machine_v2");
205+
options.setVerbatim(true);
206+
options.setRush(true);
207+
options.setTestMode(true);
208+
List<SegmentToTranscribe> segmentToTranscribeList = new ArrayList<>();
209+
SegmentToTranscribe segment = new SegmentToTranscribe();
210+
segment.setStartTimestamp(2.0);
211+
segment.setEndTimestamp(100.5);
212+
segmentToTranscribeList.add(segment);
213+
options.setSegmentsToTranscribe(segmentToTranscribeList);
214+
List<SpeakerName> speakerNamesList = new ArrayList<>();
215+
SpeakerName speaker = new SpeakerName();
216+
speaker.setDisplayName("Steve");
217+
speakerNamesList.add(speaker);
218+
options.setSpeakerNames(speakerNamesList);
189219

190220
RevAiJob revAiJob = sut.submitJobUrl(options);
191221

0 commit comments

Comments
 (0)