Skip to content

Commit 4ef97e0

Browse files
update sample, readme, tests
1 parent 821669f commit 4ef97e0

File tree

12 files changed

+261
-274
lines changed

12 files changed

+261
-274
lines changed

sdk/cognitiveservices/azure-ai-speech-transcription/README.md

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,11 @@ try {
141141
// Create transcription options
142142
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
143143

144-
// Create transcribe request content
145-
TranscriptionContent requestContent = new TranscriptionContent()
146-
.setOptions(options);
147-
148144
// Transcribe audio
149-
TranscriptionResult result = client.transcribe(requestContent);
145+
TranscriptionResult result = client.transcribe(options);
150146

151147
// Process results
152-
System.out.println("Duration: " + result.getDuration() + "ms");
148+
System.out.println("Duration: " + result.getDuration() + " ms");
153149
result.getCombinedPhrases().forEach(phrase -> {
154150
System.out.println("Channel " + phrase.getChannel() + ": " + phrase.getText());
155151
});
@@ -171,12 +167,8 @@ TranscriptionClient client = new TranscriptionClientBuilder()
171167
// Create transcription options with audio URL
172168
TranscriptionOptions options = new TranscriptionOptions("https://example.com/audio.wav");
173169

174-
// Create transcribe request content
175-
TranscriptionContent requestContent = new TranscriptionContent()
176-
.setOptions(options);
177-
178170
// Transcribe audio
179-
TranscriptionResult result = client.transcribe(requestContent);
171+
TranscriptionResult result = client.transcribe(options);
180172

181173
// Process results
182174
result.getCombinedPhrases().forEach(phrase -> {
@@ -205,12 +197,8 @@ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(au
205197
// Create transcription options with AudioFileDetails
206198
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
207199

208-
// Create transcribe request content
209-
TranscriptionContent requestContent = new TranscriptionContent()
210-
.setOptions(options);
211-
212200
// Transcribe audio
213-
TranscriptionResult result = client.transcribe(requestContent);
201+
TranscriptionResult result = client.transcribe(options);
214202

215203
// Process results
216204
result.getCombinedPhrases().forEach(phrase -> {
@@ -237,10 +225,7 @@ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
237225
.setLocales(java.util.Arrays.asList("en-US"))
238226
.setEnhancedModeOptions(enhancedMode);
239227

240-
TranscriptionContent requestContent = new TranscriptionContent()
241-
.setOptions(options);
242-
243-
TranscriptionResult result = client.transcribe(requestContent);
228+
TranscriptionResult result = client.transcribe(options);
244229
```
245230

246231
#### Enhanced mode with custom prompts
@@ -265,10 +250,7 @@ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
265250
.setLocales(java.util.Arrays.asList("en-US"))
266251
.setEnhancedModeOptions(enhancedMode);
267252

268-
TranscriptionContent requestContent = new TranscriptionContent()
269-
.setOptions(options);
270-
271-
TranscriptionResult result = client.transcribe(requestContent);
253+
TranscriptionResult result = client.transcribe(options);
272254
```
273255

274256
#### Enhanced mode with translation
@@ -289,10 +271,7 @@ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
289271
.setLocales(java.util.Arrays.asList("es-ES")) // Source language: Spanish
290272
.setEnhancedModeOptions(enhancedMode);
291273

292-
TranscriptionContent requestContent = new TranscriptionContent()
293-
.setOptions(options);
294-
295-
TranscriptionResult result = client.transcribe(requestContent);
274+
TranscriptionResult result = client.transcribe(options);
296275
```
297276

298277
### Service API versions

sdk/cognitiveservices/azure-ai-speech-transcription/customization/src/main/java/SpeechTranscriptionCustomization.java

Lines changed: 100 additions & 57 deletions
Large diffs are not rendered by default.

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/TranscriptionAsyncClient.java

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.azure.core.exception.ResourceNotFoundException;
1919
import com.azure.core.http.rest.RequestOptions;
2020
import com.azure.core.http.rest.Response;
21+
import com.azure.core.http.rest.SimpleResponse;
2122
import com.azure.core.util.BinaryData;
2223
import com.azure.core.util.FluxUtil;
2324
import com.azure.core.util.logging.ClientLogger;
@@ -47,7 +48,7 @@ public final class TranscriptionAsyncClient {
4748
/**
4849
* Transcribes the provided audio stream.
4950
* <p><strong>Response Body Schema</strong></p>
50-
*
51+
*
5152
* <pre>
5253
* {@code
5354
* {
@@ -114,7 +115,7 @@ Mono<TranscriptionResult> transcribe(TranscriptionContent body) {
114115
// Generated convenience method for transcribeWithResponse
115116
RequestOptions requestOptions = new RequestOptions();
116117
return transcribeWithResponse(
117-
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", body.getOptions())
118+
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", body.getOptions())
118119
.serializeFileField("audio", body.getAudio() == null ? null : body.getAudio().getContent(),
119120
body.getAudio() == null ? null : body.getAudio().getContentType(),
120121
body.getAudio() == null ? null : body.getAudio().getFilename())
@@ -136,10 +137,13 @@ Mono<TranscriptionResult> transcribe(TranscriptionContent body) {
136137
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
137138
* @return the result of the transcribe operation on successful completion of Mono.
138139
*/
140+
// Customized method added via post-generation customization
139141
@ServiceMethod(returns = ReturnType.SINGLE)
140142
public Mono<TranscriptionResult> transcribe(TranscriptionOptions options) {
141-
TranscriptionContent requestContent = new TranscriptionContent();
142-
requestContent.setOptions(options);
143+
TranscriptionContent requestContent = new TranscriptionContent(options);
144+
if (options.getFileDetails() != null) {
145+
requestContent.setAudio(options.getFileDetails());
146+
}
143147
return transcribe(requestContent);
144148
}
145149

@@ -155,35 +159,22 @@ public Mono<TranscriptionResult> transcribe(TranscriptionOptions options) {
155159
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
156160
* @return the response containing the result of the transcribe operation on successful completion of Mono.
157161
*/
162+
// Customized method added via post-generation customization
158163
public Mono<Response<TranscriptionResult>> transcribeWithResponse(TranscriptionOptions options) {
159-
TranscriptionContent requestContent = new TranscriptionContent();
160-
requestContent.setOptions(options);
164+
TranscriptionContent requestContent = new TranscriptionContent(options);
165+
if (options.getFileDetails() != null) {
166+
requestContent.setAudio(options.getFileDetails());
167+
}
161168
RequestOptions requestOptions = new RequestOptions();
162169
return transcribeWithResponse(
163-
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", requestContent.getOptions())
170+
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", requestContent.getOptions())
164171
.serializeFileField("audio",
165172
requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(),
166173
requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(),
167174
requestContent.getAudio() == null ? null : requestContent.getAudio().getFilename())
168175
.end()
169176
.getRequestBody(),
170-
requestOptions).map(response -> new Response<TranscriptionResult>() {
171-
172-
public int getStatusCode() {
173-
return response.getStatusCode();
174-
}
175-
176-
public com.azure.core.http.HttpHeaders getHeaders() {
177-
return response.getHeaders();
178-
}
179-
180-
public com.azure.core.http.HttpRequest getRequest() {
181-
return response.getRequest();
182-
}
183-
184-
public TranscriptionResult getValue() {
185-
return response.getValue().toObject(TranscriptionResult.class);
186-
}
187-
});
177+
requestOptions).map(
178+
response -> new SimpleResponse<>(response, response.getValue().toObject(TranscriptionResult.class)));
188179
}
189180
}

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/TranscriptionClient.java

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.azure.core.exception.ResourceNotFoundException;
1919
import com.azure.core.http.rest.RequestOptions;
2020
import com.azure.core.http.rest.Response;
21+
import com.azure.core.http.rest.SimpleResponse;
2122
import com.azure.core.util.BinaryData;
2223
import com.azure.core.util.logging.ClientLogger;
2324

@@ -45,7 +46,7 @@ public final class TranscriptionClient {
4546
/**
4647
* Transcribes the provided audio stream.
4748
* <p><strong>Response Body Schema</strong></p>
48-
*
49+
*
4950
* <pre>
5051
* {@code
5152
* {
@@ -111,7 +112,7 @@ TranscriptionResult transcribe(TranscriptionContent body) {
111112
// Generated convenience method for transcribeWithResponse
112113
RequestOptions requestOptions = new RequestOptions();
113114
return transcribeWithResponse(
114-
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", body.getOptions())
115+
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", body.getOptions())
115116
.serializeFileField("audio", body.getAudio() == null ? null : body.getAudio().getContent(),
116117
body.getAudio() == null ? null : body.getAudio().getContentType(),
117118
body.getAudio() == null ? null : body.getAudio().getFilename())
@@ -120,6 +121,7 @@ TranscriptionResult transcribe(TranscriptionContent body) {
120121
requestOptions).getValue().toObject(TranscriptionResult.class);
121122
}
122123

124+
// Customized method added via post-generation customization
123125
/**
124126
* Transcribes the provided audio stream with the specified options.
125127
*
@@ -132,10 +134,13 @@ TranscriptionResult transcribe(TranscriptionContent body) {
132134
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
133135
* @return the result of the transcribe operation.
134136
*/
137+
// Customized method added via post-generation customization
135138
@ServiceMethod(returns = ReturnType.SINGLE)
136139
public TranscriptionResult transcribe(TranscriptionOptions options) {
137-
TranscriptionContent requestContent = new TranscriptionContent();
138-
requestContent.setOptions(options);
140+
TranscriptionContent requestContent = new TranscriptionContent(options);
141+
if (options.getFileDetails() != null) {
142+
requestContent.setAudio(options.getFileDetails());
143+
}
139144
return transcribe(requestContent);
140145
}
141146

@@ -151,36 +156,22 @@ public TranscriptionResult transcribe(TranscriptionOptions options) {
151156
* @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
152157
* @return the response containing the result of the transcribe operation.
153158
*/
159+
// Customized method added via post-generation customization
154160
public Response<TranscriptionResult> transcribeWithResponse(TranscriptionOptions options) {
155-
TranscriptionContent requestContent = new TranscriptionContent();
156-
requestContent.setOptions(options);
161+
TranscriptionContent requestContent = new TranscriptionContent(options);
162+
if (options.getFileDetails() != null) {
163+
requestContent.setAudio(options.getFileDetails());
164+
}
157165
RequestOptions requestOptions = new RequestOptions();
158166
Response<BinaryData> response = transcribeWithResponse(
159-
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", requestContent.getOptions())
167+
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", requestContent.getOptions())
160168
.serializeFileField("audio",
161169
requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(),
162170
requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(),
163171
requestContent.getAudio() == null ? null : requestContent.getAudio().getFilename())
164172
.end()
165173
.getRequestBody(),
166174
requestOptions);
167-
return new Response<TranscriptionResult>() {
168-
169-
public int getStatusCode() {
170-
return response.getStatusCode();
171-
}
172-
173-
public com.azure.core.http.HttpHeaders getHeaders() {
174-
return response.getHeaders();
175-
}
176-
177-
public com.azure.core.http.HttpRequest getRequest() {
178-
return response.getRequest();
179-
}
180-
181-
public TranscriptionResult getValue() {
182-
return response.getValue().toObject(TranscriptionResult.class);
183-
}
184-
};
175+
return new SimpleResponse<>(response, response.getValue().toObject(TranscriptionResult.class));
185176
}
186177
}

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionContent.java

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@
1212
@Fluent
1313
public final class TranscriptionContent {
1414

15-
/*
16-
* Metadata for a transcription request. This field contains a JSON-serialized object of type
17-
* `TranscriptionOptions`.
18-
*/
19-
@Generated
20-
private TranscriptionOptions options;
21-
2215
/*
2316
* The content of the audio file to be transcribed. The audio file must be shorter than 2 hours in audio duration
2417
* and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
@@ -27,57 +20,54 @@ public final class TranscriptionContent {
2720
private AudioFileDetails audio;
2821

2922
/**
30-
* Creates an instance of TranscriptionContent class.
23+
* Get the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
24+
* hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
25+
*
26+
* @return the audio value.
3127
*/
3228
@Generated
33-
public TranscriptionContent() {
29+
public AudioFileDetails getAudio() {
30+
return this.audio;
3431
}
3532

3633
/**
37-
* Get the options property: Metadata for a transcription request. This field contains a JSON-serialized object of
38-
* type `TranscriptionOptions`.
34+
* Set the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
35+
* hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
3936
*
40-
* @return the options value.
37+
* @param audio the audio value to set.
38+
* @return the TranscriptionContent object itself.
4139
*/
4240
@Generated
43-
public TranscriptionOptions getOptions() {
44-
return this.options;
41+
public TranscriptionContent setAudio(AudioFileDetails audio) {
42+
this.audio = audio;
43+
return this;
4544
}
4645

4746
/**
48-
* Set the options property: Metadata for a transcription request. This field contains a JSON-serialized object of
49-
* type `TranscriptionOptions`.
47+
* Creates an instance of TranscriptionContent class.
5048
*
5149
* @param options the options value to set.
52-
* @return the TranscriptionContent object itself.
5350
*/
5451
@Generated
55-
public TranscriptionContent setOptions(TranscriptionOptions options) {
52+
public TranscriptionContent(TranscriptionOptions options) {
5653
this.options = options;
57-
return this;
5854
}
5955

60-
/**
61-
* Get the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
62-
* hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
63-
*
64-
* @return the audio value.
56+
/*
57+
* Metadata for a transcription request. This field contains a JSON-serialized object of type
58+
* `TranscriptionOptions`.
6559
*/
6660
@Generated
67-
public AudioFileDetails getAudio() {
68-
return this.audio;
69-
}
61+
private final TranscriptionOptions options;
7062

7163
/**
72-
* Set the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
73-
* hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
64+
* Get the options property: Metadata for a transcription request. This field contains a JSON-serialized object of
65+
* type `TranscriptionOptions`.
7466
*
75-
* @param audio the audio value to set.
76-
* @return the TranscriptionContent object itself.
67+
* @return the options value.
7768
*/
7869
@Generated
79-
public TranscriptionContent setAudio(AudioFileDetails audio) {
80-
this.audio = audio;
81-
return this;
70+
public TranscriptionOptions getOptions() {
71+
return this.options;
8272
}
8373
}

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionOptions.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
285285
@Generated
286286
public static TranscriptionOptions fromJson(JsonReader jsonReader) throws IOException {
287287
return jsonReader.readObject(reader -> {
288-
TranscriptionOptions deserializedTranscriptionOptions = new TranscriptionOptions();
288+
TranscriptionOptions deserializedTranscriptionOptions = new TranscriptionOptions((String) null);
289289
while (reader.nextToken() != JsonToken.END_OBJECT) {
290290
String fieldName = reader.getFieldName();
291291
reader.nextToken();
@@ -294,7 +294,7 @@ public static TranscriptionOptions fromJson(JsonReader jsonReader) throws IOExce
294294
} else if ("locales".equals(fieldName)) {
295295
List<String> locales = reader.readArray(reader1 -> reader1.getString());
296296
deserializedTranscriptionOptions.locales = locales;
297-
} else if ("models".equals(fieldName)) {
297+
} else if ("localeModelMapping".equals(fieldName)) {
298298
Map<String, String> localeModelMapping = reader.readMap(reader1 -> reader1.getString());
299299
deserializedTranscriptionOptions.localeModelMapping = localeModelMapping;
300300
} else if ("profanityFilterMode".equals(fieldName)) {
@@ -338,4 +338,13 @@ public TranscriptionOptions(String audioUrl) {
338338
public TranscriptionOptions(AudioFileDetails fileDetails) {
339339
this.audioFileDetails = fileDetails;
340340
}
341+
342+
/**
343+
* Get the audioFileDetails property: The audio file details for transcription.
344+
*
345+
* @return the audioFileDetails value.
346+
*/
347+
public AudioFileDetails getFileDetails() {
348+
return this.audioFileDetails;
349+
}
341350
}
Binary file not shown.

sdk/cognitiveservices/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/EnhancedModeSample.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ private static void demonstrateTaskSpecificEnhancedMode(String endpoint, String
276276
if (result.getPhrases() != null && !result.getPhrases().isEmpty()) {
277277
System.out.println("\nTranscription phrases:");
278278
result.getPhrases().forEach(phrase ->
279-
System.out.println(" [" + phrase.getOffset() + "ms] " + phrase.getText())
279+
System.out.println(" [" + phrase.getOffset() + " ms] " + phrase.getText())
280280
);
281281
}
282282
System.out.println();

0 commit comments

Comments
 (0)