Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions sdk/cognitiveservices/azure-ai-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ with open(audio_file_path, "rb") as audio_file:
options = TranscriptionOptions(locales=["en-US"]) # Specify the language

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = client.transcribe(request_content)
Expand Down Expand Up @@ -204,7 +204,6 @@ client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(ap

# URL to your audio file (must be publicly accessible)
audio_url = "https://example.com/path/to/audio.wav"

# Configure transcription options
options = TranscriptionOptions(locales=["en-US"])

Expand Down Expand Up @@ -266,7 +265,7 @@ with open(audio_file_path, "rb") as audio_file:
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio with enhanced mode
result = client.transcribe(request_content)
Expand Down Expand Up @@ -312,7 +311,7 @@ async with TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(
options = TranscriptionOptions(locales=["en-US"]) # Specify the language

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def transcribe(self, body: Union[_models.TranscriptionContent, JSON], **kwargs:

_body = body.as_dict() if isinstance(body, _Model) else body
_file_fields: list[str] = ["audio"]
_data_fields: list[str] = ["options"]
_data_fields: list[str] = ["definition"]
_files, _data = prepare_multipart_form_data(_body, _file_fields, _data_fields)

_request = build_transcription_transcribe_request(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=line-too-long,useless-suppression
# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
Expand All @@ -9,11 +10,15 @@
"""
from collections.abc import MutableMapping
from typing import Any, Optional
import json
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import map_error, HttpResponseError, ClientAuthenticationError, ResourceNotFoundError, ResourceExistsError, ResourceNotModifiedError

from .. import models as _models
from .._utils.model_base import _deserialize, SdkJSONEncoder
from ._operations import (
_TranscriptionClientOperationsMixin as _TranscriptionClientOperationsMixinGenerated,
build_transcription_transcribe_request,
)

JSON = MutableMapping[str, Any]
Expand Down Expand Up @@ -56,11 +61,47 @@ def transcribe_from_url(
else:
options.audio_url = audio_url

# Create request content without audio file (service will fetch from URL)
body = _models.TranscriptionContent(options=options, audio=None)

# Call the underlying protocol method
return super().transcribe(body, **kwargs)
# Send as multipart request with only definition (no audio file)
error_map: MutableMapping = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
304: ResourceNotModifiedError,
}
error_map.update(kwargs.pop("error_map", {}) or {})

_headers = kwargs.pop("headers", {}) or {}
_params = kwargs.pop("params", {}) or {}

_params["api-version"] = self._config.api_version
_headers["Accept"] = "application/json"

# Serialize definition as JSON string for multipart
definition_json = json.dumps(options.as_dict(), cls=SdkJSONEncoder, exclude_readonly=True)

# Build multipart request - pass definition through files to ensure multipart encoding
# The definition needs to be in files list with explicit content-type to trigger multipart/form-data
_request = build_transcription_transcribe_request(
api_version=self._config.api_version,
files=[("definition", (None, definition_json, "application/json"))],
headers=_headers,
params=_params,
)

path_format_arguments = {
"endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
}
_request.url = self._client.format_url(_request.url, **path_format_arguments)

pipeline_response = self._client._pipeline.run(_request, stream=False, **kwargs) # pylint: disable=protected-access
response = pipeline_response.http_response

if response.status_code not in [200]:
map_error(status_code=response.status_code, response=response, error_map=error_map)
raise HttpResponseError(response=response)

deserialized = _deserialize(_models.TranscriptionResult, response.json())
return deserialized


__all__: list[str] = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def transcribe(

_body = body.as_dict() if isinstance(body, _Model) else body
_file_fields: list[str] = ["audio"]
_data_fields: list[str] = ["options"]
_data_fields: list[str] = ["definition"]
_files, _data = prepare_multipart_form_data(_body, _file_fields, _data_fields)

_request = build_transcription_transcribe_request(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=line-too-long,useless-suppression
# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
Expand All @@ -9,9 +10,13 @@
"""
from collections.abc import MutableMapping
from typing import Any, Optional
import json
from azure.core.tracing.decorator_async import distributed_trace_async
from azure.core.exceptions import map_error, HttpResponseError, ClientAuthenticationError, ResourceNotFoundError, ResourceExistsError, ResourceNotModifiedError

from ... import models as _models
from ..._utils.model_base import _deserialize, SdkJSONEncoder
from ..._operations._operations import build_transcription_transcribe_request
from ._operations import (
_TranscriptionClientOperationsMixin as _TranscriptionClientOperationsMixinGenerated,
)
Expand Down Expand Up @@ -56,11 +61,45 @@ async def transcribe_from_url(
else:
options.audio_url = audio_url

# Create request content without audio file (service will fetch from URL)
body = _models.TranscriptionContent(options=options, audio=None)

# Call the underlying protocol method
return await super().transcribe(body, **kwargs)
# Send as multipart request with only definition (no audio file)
error_map: MutableMapping = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
304: ResourceNotModifiedError,
}
error_map.update(kwargs.pop("error_map", {}) or {})

_headers = kwargs.pop("headers", {}) or {}
_params = kwargs.pop("params", {}) or {}

_headers["Accept"] = "application/json"

# Serialize definition as JSON string for multipart
definition_json = json.dumps(options.as_dict(), cls=SdkJSONEncoder, exclude_readonly=True)

# Build multipart request - pass definition through files to ensure multipart encoding
# The definition needs to be in files list with explicit content-type to trigger multipart/form-data
_request = build_transcription_transcribe_request(
api_version=self._config.api_version,
files=[("definition", (None, definition_json, "application/json"))],
headers=_headers,
)

path_format_arguments = {
"endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
}
_request.url = self._client.format_url(_request.url, **path_format_arguments)

pipeline_response = await self._client._pipeline.run(_request, stream=False, **kwargs) # pylint: disable=protected-access
response = pipeline_response.http_response

if response.status_code not in [200]:
map_error(status_code=response.status_code, response=response, error_map=error_map)
raise HttpResponseError(response=response)

deserialized = _deserialize(_models.TranscriptionResult, response.json())
return deserialized


__all__: list[str] = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,20 +247,18 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
class TranscriptionContent(_Model):
"""Request model for transcription operation.

:ivar options: Metadata for a transcription request. This field contains a JSON-serialized
object of type ``TranscriptionOptions``.
:vartype options: ~azure.ai.transcription.models.TranscriptionOptions
:ivar definition: Metadata for a transcription request. This field contains a JSON-serialized
object of type ``TranscriptionOptions``. Required.
:vartype definition: ~azure.ai.transcription.models.TranscriptionOptions
:ivar audio: The content of the audio file to be transcribed. The audio file must be shorter
than 2 hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is
provided in the definition.
:vartype audio: ~azure.ai.transcription._utils.utils.FileType
"""

options: Optional["_models.TranscriptionOptions"] = rest_field(
visibility=["read", "create", "update", "delete", "query"]
)
definition: "_models.TranscriptionOptions" = rest_field(visibility=["read", "create", "update", "delete", "query"])
"""Metadata for a transcription request. This field contains a JSON-serialized object of type
``TranscriptionOptions``."""
``TranscriptionOptions``. Required."""
audio: Optional[FileType] = rest_field(
visibility=["read", "create", "update", "delete", "query"], is_multipart_file_input=True
)
Expand All @@ -272,7 +270,7 @@ class TranscriptionContent(_Model):
def __init__(
self,
*,
options: Optional["_models.TranscriptionOptions"] = None,
definition: "_models.TranscriptionOptions",
audio: Optional[FileType] = None,
) -> None: ...

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ def test_transcribe(self, transcription_endpoint):
client = self.create_client(endpoint=transcription_endpoint)
response = client.transcribe(
body={
"audio": "filetype",
"options": {
"definition": {
"audioUrl": "str",
"channels": [0],
"diarization": {"enabled": bool, "maxSpeakers": 0},
Expand All @@ -29,6 +28,7 @@ def test_transcribe(self, transcription_endpoint):
"phraseList": {"biasingWeight": 0.0, "phrases": ["str"]},
"profanityFilterMode": "str",
},
"audio": "filetype",
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ async def test_transcribe(self, transcription_endpoint):
client = self.create_async_client(endpoint=transcription_endpoint)
response = await client.transcribe(
body={
"audio": "filetype",
"options": {
"definition": {
"audioUrl": "str",
"channels": [0],
"diarization": {"enabled": bool, "maxSpeakers": 0},
Expand All @@ -30,6 +29,7 @@ async def test_transcribe(self, transcription_endpoint):
"phraseList": {"biasingWeight": 0.0, "phrases": ["str"]},
"profanityFilterMode": "str",
},
"audio": "filetype",
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def sample_transcribe_audio_file_async():
options = TranscriptionOptions(locales=["en-US"]) # Specify the language

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def sample_transcribe_multiple_languages_async():
options = TranscriptionOptions(locales=["en-US", "es-ES", "fr-FR", "de-DE"]) # Multiple candidates

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def sample_transcribe_with_diarization_async():
options = TranscriptionOptions(locales=["en-US"], diarization_options=diarization_options)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def sample_transcribe_with_enhanced_mode_async():
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio with enhanced mode
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def sample_transcribe_with_phrase_list_async():
options = TranscriptionOptions(locales=["en-US"], phrase_list=phrase_list)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = await client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ async def sample_transcribe_with_profanity_filter_async():
with open(audio_file_path, "rb") as audio_file:
options = TranscriptionOptions(locales=["en-US"], profanity_filter_mode="Masked")

request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

result = await client.transcribe(request_content)
print(f"Transcription (with profanity masked): {result.combined_phrases[0].text}")
Expand All @@ -55,7 +55,7 @@ async def sample_transcribe_with_profanity_filter_async():
with open(audio_file_path, "rb") as audio_file:
options = TranscriptionOptions(locales=["en-US"], profanity_filter_mode="Removed")

request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

result = await client.transcribe(request_content)
print(f"\nTranscription (with profanity removed): {result.combined_phrases[0].text}")
Expand All @@ -64,7 +64,7 @@ async def sample_transcribe_with_profanity_filter_async():
with open(audio_file_path, "rb") as audio_file:
options = TranscriptionOptions(locales=["en-US"], profanity_filter_mode="Tags")

request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

result = await client.transcribe(request_content)
print(f"\nTranscription (with profanity tagged): {result.combined_phrases[0].text}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def sample_transcribe_audio_file():
options = TranscriptionOptions(locales=["en-US"]) # Specify the language

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def sample_transcribe_from_url():

# URL to your audio file (must be publicly accessible)
audio_url = "https://example.com/path/to/audio.wav"

# Configure transcription options
options = TranscriptionOptions(locales=["en-US"])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def sample_transcribe_multiple_languages():
options = TranscriptionOptions(locales=["en-US", "es-ES", "fr-FR", "de-DE"]) # Multiple language candidates

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def sample_transcribe_with_diarization():
options = TranscriptionOptions(locales=["en-US"], diarization_options=diarization_options)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def sample_transcribe_with_enhanced_mode():
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio with enhanced mode
result = client.transcribe(request_content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def sample_transcribe_with_phrase_list():
options = TranscriptionOptions(locales=["en-US"], phrase_list=phrase_list)

# Create the request content
request_content = TranscriptionContent(options=options, audio=audio_file)
request_content = TranscriptionContent(definition=options, audio=audio_file)

# Transcribe the audio
result = client.transcribe(request_content)
Expand Down
Loading