Skip to content

Commit 0a1ed18

Browse files
[azopenai] Add in whisper (audio transcription and translation) support (Azure#21599)
- Support whisper/audio APIs + example - Fix issue with prompt_annotations being renamed to prompt_filter_results (but could be either).
1 parent 8db55a4 commit 0a1ed18

35 files changed

+1993
-592
lines changed

eng/config.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"Name": "azopenai",
45-
"CoverageGoal": 0.39
45+
"CoverageGoal": 0.34
4646
},
4747
{
4848
"Name": "aztemplate",
@@ -110,4 +110,4 @@
110110
"CoverageGoal": 0.80
111111
}
112112
]
113-
}
113+
}

sdk/ai/azopenai/CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# Release History
22

3-
## 0.2.1 (Unreleased)
3+
## 0.3.0 (Unreleased)
44

55
### Features Added
6+
- Support for Whisper audio APIs for transcription and translation using `GetAudioTranscription` and `GetAudioTranslation`.
67

78
### Breaking Changes
9+
- ChatChoiceContentFilterResults content filtering fields are now all typed as ContentFilterResult, instead of unique types for each field.
10+
- `PromptAnnotations` renamed to `PromptFilterResults` in `ChatCompletions` and `Completions`.
811

912
### Bugs Fixed
1013

sdk/ai/azopenai/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "go",
44
"TagPrefix": "go/ai/azopenai",
5-
"Tag": "go/ai/azopenai_7be6ae3c15"
5+
"Tag": "go/ai/azopenai_5ce13f37c4"
66
}

sdk/ai/azopenai/autorest.md

Lines changed: 125 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ directive:
9696
transform: $["$ref"] = "#/components/schemas/State"; delete $.allOf;
9797
- from: openapi-document
9898
where: $.components.schemas["ContentFilterResult"].properties.severity
99-
transform: $["$ref"] = "#/components/schemas/ContentFilterSeverity"; delete $.allOf;
99+
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
100100
- from: openapi-document
101101
where: $.components.schemas["ChatChoice"].properties.finish_reason
102102
transform: $["$ref"] = "#/components/schemas/CompletionsFinishReason"; delete $.oneOf;
@@ -109,6 +109,102 @@ directive:
109109
- from: openapi-document
110110
where: $.components.schemas["AzureCognitiveSearchChatExtensionConfiguration"].properties.queryType
111111
transform: $["$ref"] = "#/components/schemas/AzureCognitiveSearchQueryType"; delete $.allOf;
112+
- from: openapi-document
113+
where: $.components.schemas["ContentFilterResults"].properties.sexual
114+
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
115+
- from: openapi-document
116+
where: $.components.schemas["ContentFilterResults"].properties.hate
117+
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
118+
- from: openapi-document
119+
where: $.components.schemas["ContentFilterResults"].properties.self_harm
120+
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
121+
- from: openapi-document
122+
where: $.components.schemas["ContentFilterResults"].properties.violence
123+
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
124+
125+
#
126+
# [BEGIN] Whisper
127+
#
128+
129+
# the whisper operations are really long since they are a conglomeration of _all_ the
130+
# possible return types.
131+
- rename-operation:
132+
from: getAudioTranscriptionAsPlainText_getAudioTranscriptionAsResponseObject
133+
to: GetAudioTranscriptionInternal
134+
- rename-operation:
135+
from: getAudioTranslationAsPlainText_getAudioTranslationAsResponseObject
136+
to: GetAudioTranslationInternal
137+
138+
# fixup the responses
139+
- from: openapi-document
140+
where: $.paths["/deployments/{deploymentId}/audio/transcriptions"]
141+
transform: |
142+
delete $.post.responses["200"].statusCode;
143+
$.post.responses["200"].content["application/json"].schema["$ref"] = "#/components/schemas/AudioTranscription"; delete $.post.responses["200"].content["application/json"].schema.anyOf;
144+
- from: openapi-document
145+
where: $.paths["/deployments/{deploymentId}/audio/translations"]
146+
transform: |
147+
delete $.post.responses["200"].statusCode;
148+
$.post.responses["200"].content["application/json"].schema["$ref"] = "#/components/schemas/AudioTranscription"; delete $.post.responses["200"].content["application/json"].schema.anyOf;
149+
150+
# hide the generated functions, in favor of our public wrappers.
151+
- from:
152+
- client.go
153+
- models.go
154+
- models_serde.go
155+
- response_types.go
156+
- options.go
157+
where: $
158+
transform: |
159+
return $
160+
.replace(/GetAudioTranscriptionInternal([^){ ]*)/g, "getAudioTranscriptionInternal$1")
161+
.replace(/GetAudioTranslationInternal([^){ ]*)/g, "getAudioTranslationInternal$1");
162+
163+
# some multipart fixing
164+
- from: client.go
165+
where: $
166+
transform: |
167+
return $
168+
.replace(/(func.*getAudio(?:Translation|Transcription)InternalCreateRequest\(.+?)options/g, "$1body")
169+
.replace(/runtime\.SetMultipartFormData\(.+?\)/sg, "setMultipartFormData(req, file, *body)")
170+
171+
# response type parsing (can be text/plain _or_ JSON)
172+
- from: client.go
173+
where: $
174+
transform: |
175+
return $
176+
.replace(/client\.getAudioTranscriptionInternalHandleResponse/g, "getAudioTranscriptionInternalHandleResponse")
177+
.replace(/client\.getAudioTranslationInternalHandleResponse/g, "getAudioTranslationInternalHandleResponse")
178+
179+
# Whisper openapi3 generation: we have two oneOf that point to the same type.
180+
# and we want to activate our multipart support in the generator.
181+
- from: openapi-document
182+
where: $.paths
183+
transform: |
184+
let makeMultipart = (item) => {
185+
if (item["application/json"] == null) { return item; }
186+
item["multipart/form-data"] = {
187+
...item["application/json"]
188+
};
189+
delete item["application/json"];
190+
}
191+
makeMultipart($["/deployments/{deploymentId}/audio/transcriptions"].post.requestBody.content);
192+
makeMultipart($["/deployments/{deploymentId}/audio/translations"].post.requestBody.content);
193+
194+
- from: openapi-document
195+
where: $.components.schemas
196+
transform: |
197+
let fix = (v) => { if (v.allOf != null) { v.$ref = v.allOf[0].$ref; delete v.allOf; } };
198+
199+
fix($.AudioTranscriptionOptions.properties.response_format);
200+
fix($.AudioTranscription.properties.task);
201+
202+
fix($.AudioTranslationOptions.properties.response_format);
203+
fix($.AudioTranslation.properties.task);
204+
#
205+
# [END] Whisper
206+
#
207+
112208
# Fix "AutoGenerated" models
113209
- from: openapi-document
114210
where: $.components.schemas["ChatCompletions"].properties.usage
@@ -155,13 +251,26 @@ directive:
155251
- models_serde.go
156252
- models.go
157253
where: $
158-
transform: return $.replace(/AzureCoreFoundations/g, "azureCoreFoundations");
159-
- from:
160-
- models_serde.go
161-
- models.go
162-
where: $
163-
transform: return $.replace(/(?:\/\/.*\s)?func \(\w \*?(?:ErrorResponse|ErrorResponseError|InnerError|InnerErrorInnererror)\).*\{\s(?:.+\s)+\}\s/g, "");
164-
254+
transform: |
255+
return $
256+
// InnerError is actually a recursive type, no need for this innererrorinnererror type
257+
.replace(/\/\/ AzureCoreFoundationsInnerErrorInnererror.+?\n}/s, "")
258+
// also, remove its marshalling functions
259+
.replace(/\/\/ (Unmarshal|Marshal)JSON implements[^\n]+?AzureCoreFoundationsInnerErrorInnererror.+?\n}/sg, "")
260+
261+
// Remove any references to the type and replace them with InnerError.
262+
.replace(/Innererror \*(AzureCoreFoundationsInnerErrorInnererror|AzureCoreFoundationsErrorInnererror)/g, "InnerError *InnerError")
263+
264+
// Fix the marshallers/unmarshallers to use the right case.
265+
.replace(/(a|c).Innererror/g, '$1.InnerError')
266+
267+
// We have two "inner error" types that are identical (ErrorInnerError and InnerError). Let's eliminate the one that's not actually directly referenced.
268+
.replace(/\/\/azureCoreFoundationsInnerError.+?\n}/s, "")
269+
270+
//
271+
// Fix the AzureCoreFoundation naming to match our style.
272+
//
273+
.replace(/AzureCoreFoundations/g, "")
165274
- from: constants.go
166275
where: $
167276
transform: >-
@@ -185,15 +294,6 @@ directive:
185294
return $
186295
.replace(/runtime\.JoinPaths\(client.endpoint, urlPath\)/g, "client.formatURL(urlPath, getDeployment(body))");
187296
188-
# Some ImageGenerations hackery to represent the ImageLocation/ImagePayload polymorphism.
189-
# - Remove the auto-generated ImageGenerationsDataItem.
190-
# - Replace the ImageGenerations.Data type with []ImageGenerationDataItem
191-
# - from: models.go
192-
# where: $
193-
# transform: |
194-
# return $.replace(/type ImageGenerationsDataItem struct {[^}]+}/, "// ImageGenerationsDataItem represents an image URL or payload\ntype ImageGenerationsDataItem struct{\nImageLocation\nImagePayload\n}")
195-
# $.replace(/(type ImageGenerations struct.+?)Data any/g, "$1Data []ImageGenerationsDataItem")
196-
197297
- from: models.go
198298
where: $
199299
transform: |
@@ -261,16 +361,6 @@ directive:
261361
where: $
262362
transform: return $.replace(/Logprobs/g, "LogProbs")
263363

264-
# delete ContentFilterResult in favor of our custom representation.
265-
- from:
266-
- models.go
267-
- models_serde.go
268-
where: $
269-
transform: |
270-
return $.replace(/\/\/ ContentFilterResult.+?\n}/s, "")
271-
.replace(/\/\/ MarshalJSON implements the json.Marshaller interface for type ContentFilterResult.+?\n}/s, "")
272-
.replace(/\/\/ UnmarshalJSON implements the json.Unmarshaller interface for type ContentFilterResult.+?\n}/s, "");
273-
274364
- from: constants.go
275365
where: $
276366
transform: return $.replace(/\/\/ PossibleazureOpenAIOperationStateValues returns.+?\n}/s, "");
@@ -295,14 +385,14 @@ directive:
295385
where: $
296386
transform: |
297387
return $
298-
.replace(/\/\/ The model name.*?Model \*string/sg, "// REQUIRED: Deployment specifies the name of the deployment (for Azure OpenAI) or model (for OpenAI) to use for this request.\nDeployment string");
388+
.replace(/\/\/ The model.*?Model \*string/sg, "// REQUIRED: Deployment specifies the name of the deployment (for Azure OpenAI) or model (for OpenAI) to use for this request.\nDeployment string");
299389
300390
- from: models_serde.go
301391
where: $
302392
transform: |
303393
return $
304-
.replace(/populate\(objectMap, "model", (c|e).Model\)/g, 'populate(objectMap, "model", &$1.Deployment)')
305-
.replace(/err = unpopulate\(val, "Model", &(c|e).Model\)/g, 'err = unpopulate(val, "Model", &$1.Deployment)');
394+
.replace(/populate\(objectMap, "model", (c|e|a).Model\)/g, 'populate(objectMap, "model", &$1.Deployment)')
395+
.replace(/err = unpopulate\(val, "Model", &(c|e|a).Model\)/g, 'err = unpopulate(val, "Model", &$1.Deployment)');
306396
307397
# Make the Azure extensions internal - we expose these through the GetChatCompletions*() functions
308398
# and just treat which endpoint we use as an implementation detail.
@@ -344,4 +434,9 @@ directive:
344434
return $.replace(
345435
/(AzureChatExtensionTypeAzureCognitiveSearch AzureChatExtensionType)/,
346436
"// AzureChatExtensionTypeAzureCognitiveSearch enables the use of an Azure Cognitive Search index with chat completions.\n// [AzureChatExtensionConfiguration.Parameter] should be of type [AzureCognitiveSearchChatExtensionConfiguration].\n$1");
437+
438+
# HACK: prompt_filter_results <-> prompt_annotations change
439+
- from: models_serde.go
440+
where: $
441+
transform: return $.replace(/case "prompt_filter_results":/g, 'case "prompt_annotations":\nfallthrough\ncase "prompt_filter_results":')
347442
```

sdk/ai/azopenai/ci.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ stages:
3838
# Azure OpenAI
3939
AOAI_ENDPOINT: $(AOAI-ENDPOINT)
4040
AOAI_API_KEY: $(AOAI-API-KEY)
41-
AOAI_CHAT_COMPLETIONS_MODEL_DEPLOYMENT: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT)
42-
AOAI_COMPLETIONS_MODEL_DEPLOYMENT: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT)
43-
AOAI_EMBEDDINGS_MODEL_DEPLOYMENT: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT)
41+
AOAI_CHAT_COMPLETIONS_MODEL: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT)
42+
AOAI_COMPLETIONS_MODEL: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT)
43+
AOAI_EMBEDDINGS_MODEL: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT)
4444

4545
# Azure OpenAI "Canary"
46-
AOAI_COMPLETIONS_MODEL_DEPLOYMENT_CANARY: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
46+
AOAI_COMPLETIONS_MODEL_CANARY: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
4747
AOAI_API_KEY_CANARY: $(AOAI-API-KEY-CANARY)
48-
AOAI_EMBEDDINGS_MODEL_DEPLOYMENT_CANARY: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT-CANARY)
49-
AOAI_CHAT_COMPLETIONS_MODEL_DEPLOYMENT_CANARY: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
48+
AOAI_EMBEDDINGS_MODEL_CANARY: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT-CANARY)
49+
AOAI_CHAT_COMPLETIONS_MODEL_CANARY: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
5050
AOAI_ENDPOINT_CANARY: $(AOAI-ENDPOINT-CANARY)
5151

5252
# OpenAI
@@ -61,3 +61,6 @@ stages:
6161
COGNITIVE_SEARCH_API_INDEX: $(COGNITIVE-SEARCH-API-INDEX)
6262
COGNITIVE_SEARCH_API_KEY: $(COGNITIVE-SEARCH-API-KEY)
6363

64+
AOAI_ENDPOINT_WHISPER: $(AOAI-ENDPOINT-WHISPER)
65+
AOAI_API_KEY_WHISPER: $(AOAI-API-KEY-WHISPER)
66+
AOAI_MODEL_WHISPER: $(AOAI-MODEL-WHISPER)

0 commit comments

Comments
 (0)