Skip to content

Commit 4b2eebb

Browse files
v-durgeshsVinothini Dharmaraj
andauthored
Added transcription packet parser. (#34768)
* Added Transcription packet parser. * fixed lint errors. * updating and fixing the module import error * Fixing module not found issue * fixing the tests --------- Co-authored-by: Vinothini Dharmaraj <[email protected]>
1 parent 1e44338 commit 4b2eebb

File tree

5 files changed

+252
-0
lines changed

5 files changed

+252
-0
lines changed

sdk/communication/azure-communication-callautomation/azure/communication/callautomation/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ._version import VERSION
99
from ._call_automation_client import CallAutomationClient
1010
from ._call_connection_client import CallConnectionClient
11+
from .streaming.streaming_data_parser import StreamingDataParser
1112
from ._models import (
1213
CallConnectionProperties,
1314
FileSource,
@@ -52,11 +53,19 @@
5253
RecordingState,
5354
VoiceKind
5455
)
56+
from .streaming.models import (
57+
TranscriptionMetadata,
58+
TranscriptionData
59+
)
60+
5561
__all__ = [
5662
# clients
5763
"CallAutomationClient",
5864
"CallConnectionClient",
5965

66+
# parser
67+
"StreamingDataParser",
68+
6069
# models for input
6170
"FileSource",
6271
"TextSource",
@@ -87,6 +96,10 @@
8796
"CommunicationCloudEnvironment",
8897
"UnknownIdentifier",
8998

99+
# streaming models
100+
"TranscriptionMetadata",
101+
"TranscriptionData",
102+
90103
# enums
91104
"CallRejectReason",
92105
"RecordingContent",
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
6+
7+
from enum import Enum
8+
from typing import List
9+
from azure.communication.callautomation._shared.models import CommunicationIdentifier
10+
11+
class ResultStatus(Enum):
12+
"""
13+
The status of the result of transcription.
14+
"""
15+
INTERMEDIATE = "intermediate"
16+
FINAL = "final"
17+
18+
class TextFormat(Enum):
19+
"""
20+
The format of transcription text.
21+
"""
22+
DISPLAY = "display"
23+
24+
class WordData:
25+
"""
26+
Text in the phrase.
27+
28+
:keyword text: Text in the phrase.
29+
:paramtype text: str
30+
:keyword offset: The word's position within the phrase.
31+
:paramtype offset: int
32+
:keyword duration: Duration in ticks. 1 tick = 100 nanoseconds.
33+
:paramtype duration: int
34+
"""
35+
text:str
36+
""" Text in the phrase. """
37+
offset:int
38+
""" The word's position within the phrase. """
39+
duration:int
40+
""" Duration in ticks. 1 tick = 100 nanoseconds. """
41+
def __init__(self, text: str, offset: int, duration: int):
42+
self.text = text
43+
self.offset = offset
44+
self.duration = duration
45+
46+
class TranscriptionMetadata:
47+
"""
48+
Metadata for Transcription Streaming.
49+
50+
:keyword subscriptionId: Transcription Subscription Id.
51+
:paramtype subscriptionId: str
52+
:keyword locale: The target locale in which the translated text needs to be.
53+
:paramtype locale: str
54+
:keyword callConnectionId: call connection Id.
55+
:paramtype callConnectionId: str
56+
:keyword correlationId: correlation Id.
57+
:paramtype correlationId: str
58+
"""
59+
60+
subscriptionId: str
61+
""" Transcription Subscription Id. """
62+
locale: str
63+
""" The target locale in which the translated text needs to be. """
64+
callConnectionId: str
65+
""" call connection Id. """
66+
correlationId: str
67+
""" correlation Id. """
68+
def __init__(self, subscriptionId: str, locale: str, callConnectionId: str, correlationId: str):
69+
self.subscriptionId = subscriptionId
70+
self.locale = locale
71+
self.callConnectionId = callConnectionId
72+
self.correlationId = correlationId
73+
74+
class TranscriptionData:
75+
"""
76+
Streaming Transcription.
77+
:keyword text: The display form of the recognized word.
78+
:paramtype text: str
79+
:keyword format: The format of text.
80+
:paramtype format: TextFormat
81+
:keyword confidence: Confidence of recognition of the whole phrase.
82+
:paramtype confidence: float
83+
:keyword offset: The position of this payload.
84+
:paramtype offset: int
85+
:keyword duration: Duration in ticks. 1 tick = 100 nanoseconds.
86+
:paramtype duration: int
87+
:keyword words: The result for each word of the phrase.
88+
:paramtype words: List[WordData]
89+
:keyword participant: The identified speaker based on participant raw ID.
90+
:paramtype participant: CommunicationIdentifier
91+
:keyword resultStatus: Status of the result of transcription.
92+
:paramtype resultStatus: ResultStatus
93+
"""
94+
text: str
95+
""" The display form of the recognized word. """
96+
format: TextFormat
97+
""" The format of text. """
98+
confidence: float
99+
""" Confidence of recognition of the whole phrase, from 0.0 (no confidence) to 1.0 (full confidence). """
100+
offset: int
101+
""" The position of this payload. """
102+
duration: int
103+
""" Duration in ticks. 1 tick = 100 nanoseconds. """
104+
words: List[WordData]
105+
""" The result for each word of the phrase. """
106+
participant: CommunicationIdentifier
107+
""" The identified speaker based on participant raw ID. """
108+
resultStatus: ResultStatus
109+
""" Status of the result of transcription. """
110+
def __init__(self, text: str, format: TextFormat, confidence: float, offset: int, duration: int,
111+
words: List[WordData], participant: CommunicationIdentifier, resultStatus: ResultStatus):
112+
self.text = text
113+
self.format = format
114+
self.confidence = confidence
115+
self.offset = offset
116+
self.duration = duration
117+
self.words = words
118+
self.participant = participant
119+
self.resultStatus = resultStatus
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
6+
7+
from typing import Union
8+
import json
9+
from azure.communication.callautomation._shared.models import identifier_from_raw_id
10+
from azure.communication.callautomation.streaming.models import (TranscriptionMetadata,TranscriptionData,WordData)
11+
12+
class StreamingDataParser:
13+
@staticmethod
14+
def parse(packet_data: Union[str, bytes]) -> Union[TranscriptionMetadata, TranscriptionData]:
15+
"""
16+
Parse the incoming packets.
17+
:keyword packet_data: Transcription packet data.
18+
:paramtype packet_data: Union[str, bytes]
19+
:return: Union[TranscriptionMetadata, TranscriptionData]
20+
:rType: TranscriptionMetadata, TranscriptionData
21+
:raises: ValueError
22+
"""
23+
if isinstance(packet_data, str):
24+
string_json = packet_data
25+
elif isinstance(packet_data,bytes):
26+
string_json = packet_data.decode('utf-8')
27+
else:
28+
ValueError(packet_data)
29+
30+
json_object = json.loads(string_json)
31+
kind = json_object['kind']
32+
33+
if kind == 'TranscriptionMetadata':
34+
transcription_metadata = TranscriptionMetadata(**json_object['transcriptionMetadata'])
35+
return transcription_metadata
36+
if kind == 'TranscriptionData':
37+
participant = identifier_from_raw_id(json_object['transcriptionData']['participantRawID'])
38+
word_data_list = json_object['transcriptionData']['words']
39+
words = [WordData(entry["text"], entry["offset"], entry["duration"]) for entry in word_data_list]
40+
transcription_data = TranscriptionData(
41+
text=json_object['transcriptionData']['text'],
42+
format=json_object['transcriptionData']['format'],
43+
confidence=json_object['transcriptionData']['confidence'],
44+
offset=json_object['transcriptionData']['offset'],
45+
duration=json_object['transcriptionData']['duration'],
46+
words=words,
47+
participant=participant,
48+
resultStatus=json_object['transcriptionData']['resultStatus']
49+
)
50+
return transcription_data
51+
raise ValueError(string_json)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
6+
7+
import json
8+
import unittest
9+
from azure.communication.callautomation.streaming.models import (TranscriptionMetadata,TranscriptionData,WordData,TextFormat,ResultStatus)
10+
from azure.communication.callautomation.streaming.streaming_data_parser import StreamingDataParser
11+
12+
class TestStreamDataParser(unittest.TestCase):
13+
def setUp(self):
14+
self.transcriptionMetaDataJson = '{"kind":"TranscriptionMetadata","transcriptionMetadata":{"subscriptionId":"0000a000-9999-5555-ae00-cd00e0bc0000","locale":"en-US","callConnectionId":"6d09449c-6677-4f91-8cb7-012c338e6ec1","correlationId":"6d09449c-6677-4f91-8cb7-012c338e6ec1"}}'
15+
self.transcriptionDataJson = '{"kind":"TranscriptionData","transcriptionData":{"text":"Is everything fine.","format":"display","confidence":0.8138430714607239,"offset":868464674,"duration":11600000,"words":[{"text":"is","offset":868464674,"duration":2400000},{"text":"everything","offset":870864674,"duration":5200000},{"text":"fine","offset":876064674,"duration":4000000}],"participantRawID":"4:+910000000000","resultStatus":"Final"}}'
16+
17+
def test_parse_binary_to_transcription_metadata(self):
18+
transcriptionMetaDataBinary = self.transcriptionMetaDataJson.encode('utf-8')
19+
parsedData = StreamingDataParser.parse(transcriptionMetaDataBinary)
20+
self.assertTrue(isinstance(parsedData, TranscriptionMetadata))
21+
self.validate_transcription_metadata(parsedData)
22+
23+
def test_parse_json_to_transcription_metadata(self):
24+
parsedData = StreamingDataParser.parse(self.transcriptionMetaDataJson)
25+
self.assertTrue(isinstance(parsedData, TranscriptionMetadata))
26+
self.validate_transcription_metadata(parsedData)
27+
28+
def test_parse_binary_to_transcription_data(self):
29+
transcriptionDataBinary = self.transcriptionDataJson.encode('utf-8')
30+
parsedData = StreamingDataParser.parse(transcriptionDataBinary)
31+
self.assertTrue(isinstance(parsedData, TranscriptionData))
32+
self.validate_transcription_data(parsedData)
33+
34+
def test_parse_json_to_transcription_data(self):
35+
parsedData = StreamingDataParser.parse(self.transcriptionDataJson)
36+
self.assertTrue(isinstance(parsedData, TranscriptionData))
37+
self.validate_transcription_data(parsedData)
38+
39+
def validate_transcription_metadata(self, transcriptionMetadata):
40+
self.assertEqual(transcriptionMetadata.subscriptionId, "0000a000-9999-5555-ae00-cd00e0bc0000")
41+
self.assertEqual(transcriptionMetadata.locale, "en-US")
42+
self.assertEqual(transcriptionMetadata.correlationId, "6d09449c-6677-4f91-8cb7-012c338e6ec1")
43+
self.assertEqual(transcriptionMetadata.callConnectionId, "6d09449c-6677-4f91-8cb7-012c338e6ec1")
44+
45+
def validate_transcription_data(self, transcriptionData):
46+
self.assertEqual(transcriptionData.text, "Is everything fine.")
47+
self.assertEqual(transcriptionData.format, "display")
48+
self.assertEqual(transcriptionData.resultStatus, "Final")
49+
self.assertAlmostEqual(transcriptionData.confidence, 0.8138430714607239)
50+
self.assertEqual(transcriptionData.offset, 868464674)
51+
self.assertEqual(transcriptionData.duration, 11600000)
52+
self.assertEqual(len(transcriptionData.words), 3)
53+
self.assertEqual(transcriptionData.words[0].text, "is")
54+
self.assertEqual(transcriptionData.words[0].offset, 868464674)
55+
self.assertEqual(transcriptionData.words[0].duration, 2400000)
56+
self.assertEqual(transcriptionData.words[1].text, "everything")
57+
self.assertEqual(transcriptionData.words[1].offset, 870864674)
58+
self.assertEqual(transcriptionData.words[1].duration, 5200000)
59+
self.assertEqual(transcriptionData.words[2].text, "fine")
60+
self.assertEqual(transcriptionData.words[2].offset, 876064674)
61+
self.assertEqual(transcriptionData.words[2].duration, 4000000)
62+
self.assertEqual(transcriptionData.participant.raw_id, "4:+910000000000")
63+
64+

0 commit comments

Comments
 (0)