Skip to content

Commit 70ba08b

Browse files
authored
Added audio streaming parser (#35827)
1 parent b447ad7 commit 70ba08b

File tree

3 files changed

+137
-6
lines changed

3 files changed

+137
-6
lines changed

sdk/communication/azure-communication-callautomation/azure/communication/callautomation/_models.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
# Licensed under the MIT License. See License.txt in the project root for
55
# license information.
66
# --------------------------------------------------------------------------
7+
78
from typing import List, Optional, Union, TYPE_CHECKING
89
from enum import Enum
10+
from datetime import datetime
911
from typing_extensions import Literal
1012
from ._generated.models import (
1113
CallLocator,
@@ -1062,3 +1064,68 @@ def __init__(
10621064
self.words = words
10631065
self.participant = participant
10641066
self.result_state = result_state
1067+
1068+
class AudioMetadata:
1069+
"""
1070+
Metadata for Audio Streaming.
1071+
:keyword subscription_id: Audio subscription id.
1072+
:paramtype subscription_id: str
1073+
:keyword encoding: Audio encoding.
1074+
:paramtype encoding: str
1075+
:keyword sample_rate: Audio sample rate.
1076+
:paramtype sample_rate: int
1077+
:keyword channels: Audio channels.
1078+
:paramtype channels: int
1079+
:keyword length: Audio length.
1080+
:paramtype length: int
1081+
"""
1082+
subscription_id: str
1083+
""" Audio subscription id. """
1084+
encoding: str
1085+
""" Audio encoding. """
1086+
sample_rate: int
1087+
""" Audio sample rate. """
1088+
channels: int
1089+
""" Audio channels. """
1090+
length: int
1091+
""" Audio length. """
1092+
1093+
def __init__(
1094+
self,
1095+
*,
1096+
subscription_id: str,
1097+
encoding: str,
1098+
sample_rate: int,
1099+
channels: int,
1100+
length: int):
1101+
self.subscription_id = subscription_id
1102+
self.encoding = encoding
1103+
self.sample_rate = sample_rate
1104+
self.channels = channels
1105+
self.length = length
1106+
1107+
class AudioData:
1108+
"""
1109+
Data for Audio Streaming.
1110+
:keyword data: Audio streaming data.
1111+
:paramtype data: str
1112+
:keyword time_stamp: time stamp.
1113+
:paramtype time_stamp: datetime
1114+
:keyword is_silent: Is silent.
1115+
:paramtype is_silent: bool
1116+
"""
1117+
data: str
1118+
""" Audio streaming data. """
1119+
time_stamp : datetime
1120+
""" Time stamp. """
1121+
is_silent : bool
1122+
""" Is silent. """
1123+
def __init__(
1124+
self,
1125+
*,
1126+
data: str,
1127+
time_stamp: str,
1128+
is_silent: bool):
1129+
self.data = data
1130+
self.time_stamp = time_stamp
1131+
self.is_silent = is_silent

sdk/communication/azure-communication-callautomation/azure/communication/callautomation/_streaming_data_parser.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,26 @@
77
from typing import Union
88
import json
99
from azure.communication.callautomation._shared.models import identifier_from_raw_id
10-
from azure.communication.callautomation._models import (TranscriptionMetadata,TranscriptionData,WordData)
10+
from azure.communication.callautomation._models import (
11+
TranscriptionMetadata,
12+
TranscriptionData,
13+
WordData,
14+
AudioMetadata,
15+
AudioData)
1116

1217
class StreamingDataParser:
1318
@staticmethod
14-
def parse(packet_data: Union[str, bytes]) -> Union[TranscriptionMetadata, TranscriptionData]:
19+
def parse(packet_data: Union[str, bytes]) -> Union[
20+
TranscriptionMetadata,
21+
TranscriptionData,
22+
AudioMetadata,
23+
AudioData]:
1524
"""
1625
Parse the incoming packets.
1726
:param packet_data: Transcription packet data.
1827
:type packet_data: Union[str, bytes]
19-
:return: Union[TranscriptionMetadata, TranscriptionData]
20-
:rtype: TranscriptionMetadata, TranscriptionData
28+
:return: Union[TranscriptionMetadata, TranscriptionData, AudioMetadata, AudioData]
29+
:rtype: TranscriptionMetadata, TranscriptionData, AudioMetadata, AudioData
2130
:raises: ValueError
2231
"""
2332
if isinstance(packet_data, str):
@@ -52,4 +61,20 @@ def parse(packet_data: Union[str, bytes]) -> Union[TranscriptionMetadata, Transc
5261
result_state=json_object['transcriptionData']['resultStatus']
5362
)
5463
return transcription_data
64+
if kind == 'AudioMetadata':
65+
audio_metadata = AudioMetadata(
66+
subscription_id=json_object['audioMetadata']['subscriptionId'],
67+
encoding=json_object['audioMetadata']['encoding'],
68+
sample_rate=json_object['audioMetadata']['sampleRate'],
69+
channels=json_object['audioMetadata']['channels'],
70+
length=json_object['audioMetadata']['length']
71+
)
72+
return audio_metadata
73+
if kind == 'AudioData':
74+
audio_data = AudioData(
75+
data=json_object['audioData']['data'],
76+
time_stamp=json_object['audioData']['timestamp'],
77+
is_silent=json_object['audioData']['silent']
78+
)
79+
return audio_data
5580
raise ValueError(string_json)

sdk/communication/azure-communication-callautomation/tests/test_streaming_data_parser.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,19 @@
66

77
import json
88
import unittest
9-
from azure.communication.callautomation._models import (TranscriptionMetadata,TranscriptionData,WordData,TextFormat)
9+
from azure.communication.callautomation._models import (
10+
TranscriptionMetadata,
11+
TranscriptionData,
12+
AudioMetadata,
13+
AudioData)
1014
from azure.communication.callautomation._streaming_data_parser import StreamingDataParser
1115

1216
class TestStreamDataParser(unittest.TestCase):
1317
def setUp(self):
1418
self.transcriptionMetaDataJson = '{"kind":"TranscriptionMetadata","transcriptionMetadata":{"subscriptionId":"0000a000-9999-5555-ae00-cd00e0bc0000","locale":"en-US","callConnectionId":"6d09449c-6677-4f91-8cb7-012c338e6ec1","correlationId":"6d09449c-6677-4f91-8cb7-012c338e6ec1"}}'
1519
self.transcriptionDataJson = '{"kind":"TranscriptionData","transcriptionData":{"text":"Is everything fine.","format":"display","confidence":0.8138430714607239,"offset":868464674,"duration":11600000,"words":[{"text":"is","offset":868464674,"duration":2400000},{"text":"everything","offset":870864674,"duration":5200000},{"text":"fine","offset":876064674,"duration":4000000}],"participantRawID":"4:+910000000000","resultStatus":"Final"}}'
16-
20+
self.audioMetadataJson = '{"kind":"AudioMetadata","audioMetadata":{"subscriptionId":"4af370df-3868-461f-8242-91f077a6f8a6","encoding":"PCM","sampleRate":16000,"channels":1,"length":640}}'
21+
self.audioDataJson = '{"kind":"AudioData","audioData":{"timestamp":"2024-05-30T06:25:02.948Z","data":"test","silent":false}}'
1722
def test_parse_binary_to_transcription_metadata(self):
1823
transcriptionMetaDataBinary = self.transcriptionMetaDataJson.encode('utf-8')
1924
parsedData = StreamingDataParser.parse(transcriptionMetaDataBinary)
@@ -60,3 +65,37 @@ def validate_transcription_data(self, transcriptionData):
6065
self.assertEqual(transcriptionData.words[2].offset, 876064674)
6166
self.assertEqual(transcriptionData.words[2].duration, 4000000)
6267
self.assertEqual(transcriptionData.participant.raw_id, "4:+910000000000")
68+
69+
def test_parse_json_to_audio_metadata(self):
70+
parsedData = StreamingDataParser.parse(self.audioMetadataJson)
71+
self.assertTrue(isinstance(parsedData, AudioMetadata))
72+
self.validate_audio_metadata(parsedData)
73+
74+
def test_parse_binary_to_audio_metadata(self):
75+
audioMetadataBinary = self.audioMetadataJson.encode('utf-8')
76+
parsedData = StreamingDataParser.parse(audioMetadataBinary)
77+
self.assertTrue(isinstance(parsedData, AudioMetadata))
78+
self.validate_audio_metadata(parsedData)
79+
80+
def test_parse_json_to_audio_data(self):
81+
parsedData = StreamingDataParser.parse(self.audioDataJson)
82+
self.assertTrue(isinstance(parsedData, AudioData))
83+
self.validate_audio_data(parsedData)
84+
85+
def test_parse_binary_to_audio_data(self):
86+
audioDataBinary = self.audioDataJson.encode('utf-8')
87+
parsedData = StreamingDataParser.parse(audioDataBinary)
88+
self.assertTrue(isinstance(parsedData, AudioData))
89+
self.validate_audio_data(parsedData)
90+
91+
def validate_audio_metadata(self, audioMetadata):
92+
self.assertEqual(audioMetadata.subscription_id,'4af370df-3868-461f-8242-91f077a6f8a6')
93+
self.assertEqual(audioMetadata.encoding,'PCM')
94+
self.assertEqual(audioMetadata.sample_rate,16000)
95+
self.assertEqual(audioMetadata.channels,1)
96+
self.assertEqual(audioMetadata.length,640)
97+
98+
def validate_audio_data(self, audioData):
99+
self.assertEqual(audioData.data,"test")
100+
self.assertEqual(audioData.is_silent,False)
101+

0 commit comments

Comments
 (0)