Skip to content

Commit 414e8af

Browse files
update samples
1 parent f440c98 commit 414e8af

18 files changed

+759
-1282
lines changed

sdk/cognitiveservices/azure-ai-speech-transcription/README.md

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -206,72 +206,72 @@ result.getCombinedPhrases().forEach(phrase -> {
206206
});
207207
```
208208

209-
### Use enhanced mode for improved transcription quality
209+
### Transcribe with multi-language support
210210

211-
Enhanced mode provides advanced features to improve transcription accuracy with custom prompts, translation capabilities, and task-specific optimizations.
211+
The service can automatically detect and transcribe multiple languages within the same audio file.
212212

213-
#### Basic enhanced mode
214-
215-
```java readme-sample-enhancedModeBasic
213+
```java com.azure.ai.speech.transcription.transcriptionoptions.multilanguage
216214
byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
217215

218216
AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData))
219217
.setFilename("audio.wav");
220218

221-
// Enable enhanced mode for improved transcription quality
222-
EnhancedModeOptions enhancedMode = new EnhancedModeOptions();
223-
224-
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
225-
.setLocales(java.util.Arrays.asList("en-US"))
226-
.setEnhancedModeOptions(enhancedMode);
219+
// Configure transcription WITHOUT specifying locales
220+
// This allows the service to auto-detect and transcribe multiple languages
221+
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
227222

228223
TranscriptionResult result = client.transcribe(options);
224+
225+
result.getPhrases().forEach(phrase -> {
226+
System.out.println("Language: " + phrase.getLocale());
227+
System.out.println("Text: " + phrase.getText());
228+
});
229229
```
230230

231-
#### Enhanced mode with custom prompts
231+
### Transcribe with enhanced mode
232232

233-
Use prompts to guide transcription with domain-specific terminology, improving accuracy for specialized content like medical, legal, or technical discussions.
233+
Enhanced mode provides advanced features to improve transcription accuracy with custom prompts.
234234

235-
```java readme-sample-enhancedModeWithPrompts
235+
```java com.azure.ai.speech.transcription.transcriptionoptions.enhancedmode
236236
byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
237237

238238
AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData))
239239
.setFilename("audio.wav");
240240

241-
// Use prompts to guide transcription with domain-specific terminology
242241
EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
243-
.setPrompts(java.util.Arrays.asList(
244-
"Medical consultation discussing hypertension and diabetes",
245-
"Common medications: metformin, lisinopril, atorvastatin",
246-
"Patient symptoms and treatment plan"
247-
));
242+
.setTask("transcribe")
243+
.setPrompts(java.util.Arrays.asList("Output must be in lexical format."));
248244

249245
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
250-
.setLocales(java.util.Arrays.asList("en-US"))
251246
.setEnhancedModeOptions(enhancedMode);
252247

253248
TranscriptionResult result = client.transcribe(options);
249+
250+
System.out.println("Transcription: " + result.getCombinedPhrases().get(0).getText());
254251
```
255252

256-
#### Enhanced mode with translation
253+
### Transcribe with phrase list
257254

258-
Transcribe audio in one language and translate to another language simultaneously.
255+
You can use a phrase list to improve recognition accuracy for specific terms.
259256

260-
```java readme-sample-enhancedModeWithTranslation
257+
```java com.azure.ai.speech.transcription.transcriptionoptions.phraselist
261258
byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
262259

263260
AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData))
264261
.setFilename("audio.wav");
265262

266-
// Configure enhanced mode to transcribe Spanish audio and translate to English
267-
EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
268-
.setTargetLanguage("en-US"); // Translate to English
263+
PhraseListOptions phraseListOptions = new PhraseListOptions()
264+
.setPhrases(java.util.Arrays.asList("Azure", "Cognitive Services"))
265+
.setBiasingWeight(5.0);
269266

270267
TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
271-
.setLocales(java.util.Arrays.asList("es-ES")) // Source language: Spanish
272-
.setEnhancedModeOptions(enhancedMode);
268+
.setPhraseListOptions(phraseListOptions);
273269

274270
TranscriptionResult result = client.transcribe(options);
271+
272+
result.getCombinedPhrases().forEach(phrase -> {
273+
System.out.println(phrase.getText());
274+
});
275275
```
276276

277277
### Service API versions

sdk/cognitiveservices/azure-ai-speech-transcription/customization/src/main/java/SpeechTranscriptionCustomization.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ private void customizeTranscriptionClient(PackageCustomization packageCustomizat
275275
+ "if (options.getFileDetails() != null) { requestContent.setAudio(options.getFileDetails()); } "
276276
+ "RequestOptions requestOptions = new RequestOptions(); "
277277
+ "Response<BinaryData> response = transcribeWithResponse("
278-
+ "new MultipartFormDataHelper(requestOptions).serializeJsonField(\"options\", requestContent.getOptions())"
278+
+ "new MultipartFormDataHelper(requestOptions).serializeJsonField(\"definition\", requestContent.getOptions())"
279279
+ ".serializeFileField(\"audio\", requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(), "
280280
+ "requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(), "
281281
+ "requestContent.getAudio() == null ? null : requestContent.getAudio().getFilename())"
@@ -359,7 +359,7 @@ private void customizeTranscriptionAsyncClient(PackageCustomization packageCusto
359359
.setBody(parseBlock("{ TranscriptionContent requestContent = new TranscriptionContent(options); "
360360
+ "if (options.getFileDetails() != null) { requestContent.setAudio(options.getFileDetails()); } "
361361
+ "RequestOptions requestOptions = new RequestOptions(); " + "return transcribeWithResponse("
362-
+ "new MultipartFormDataHelper(requestOptions).serializeJsonField(\"options\", requestContent.getOptions())"
362+
+ "new MultipartFormDataHelper(requestOptions).serializeJsonField(\"definition\", requestContent.getOptions())"
363363
+ ".serializeFileField(\"audio\", requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(), "
364364
+ "requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(), "
365365
+ "requestContent.getAudio() == null ? null : requestContent.getAudio().getFilename())"

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/TranscriptionAsyncClient.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public final class TranscriptionAsyncClient {
4848
/**
4949
* Transcribes the provided audio stream.
5050
* <p><strong>Response Body Schema</strong></p>
51-
*
51+
*
5252
* <pre>
5353
* {@code
5454
* {
@@ -115,7 +115,7 @@ Mono<TranscriptionResult> transcribe(TranscriptionContent body) {
115115
// Generated convenience method for transcribeWithResponse
116116
RequestOptions requestOptions = new RequestOptions();
117117
return transcribeWithResponse(
118-
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", body.getOptions())
118+
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", body.getOptions())
119119
.serializeFileField("audio", body.getAudio() == null ? null : body.getAudio().getContent(),
120120
body.getAudio() == null ? null : body.getAudio().getContentType(),
121121
body.getAudio() == null ? null : body.getAudio().getFilename())
@@ -165,7 +165,7 @@ public Mono<Response<TranscriptionResult>> transcribeWithResponse(TranscriptionO
165165
}
166166
RequestOptions requestOptions = new RequestOptions();
167167
return transcribeWithResponse(
168-
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", requestContent.getOptions())
168+
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", requestContent.getOptions())
169169
.serializeFileField("audio",
170170
requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(),
171171
requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(),

sdk/cognitiveservices/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/TranscriptionClient.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public final class TranscriptionClient {
4646
/**
4747
* Transcribes the provided audio stream.
4848
* <p><strong>Response Body Schema</strong></p>
49-
*
49+
*
5050
* <pre>
5151
* {@code
5252
* {
@@ -112,7 +112,7 @@ TranscriptionResult transcribe(TranscriptionContent body) {
112112
// Generated convenience method for transcribeWithResponse
113113
RequestOptions requestOptions = new RequestOptions();
114114
return transcribeWithResponse(
115-
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", body.getOptions())
115+
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", body.getOptions())
116116
.serializeFileField("audio", body.getAudio() == null ? null : body.getAudio().getContent(),
117117
body.getAudio() == null ? null : body.getAudio().getContentType(),
118118
body.getAudio() == null ? null : body.getAudio().getFilename())
@@ -161,7 +161,7 @@ public Response<TranscriptionResult> transcribeWithResponse(TranscriptionOptions
161161
}
162162
RequestOptions requestOptions = new RequestOptions();
163163
Response<BinaryData> response = transcribeWithResponse(
164-
new MultipartFormDataHelper(requestOptions).serializeJsonField("options", requestContent.getOptions())
164+
new MultipartFormDataHelper(requestOptions).serializeJsonField("definition", requestContent.getOptions())
165165
.serializeFileField("audio",
166166
requestContent.getAudio() == null ? null : requestContent.getAudio().getContent(),
167167
requestContent.getAudio() == null ? null : requestContent.getAudio().getContentType(),
Binary file not shown.

0 commit comments

Comments
 (0)