Skip to content

Commit eca50c1

Browse files
Add extra post-processing to generate SRT and WebVTT captions (#95)
* Add extra post-processing to generate SRT and WebVTT captions * If no Utterances, fall back to full transcript with a warning * Add basic tests
1 parent b4849f6 commit eca50c1

File tree

8 files changed

+589
-1
lines changed

8 files changed

+589
-1
lines changed

.github/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ We also welcome anyone to work on any existing issues with the `👋🏽 good fi
3333

3434
For a contribution to be accepted:
3535

36-
* The test suite must be complete and pass
36+
* The test suite must be complete and pass. Run `pytest --api-key <key> tests/`
3737
* Code must follow existing styling conventions
3838
* Commit messages must be descriptive. Related issues should be mentioned by number.
3939

deepgram/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .members import Members
99
from .scopes import Scopes
1010
from .invitations import Invitations
11+
from .extra import Extra
1112

1213

1314
class Deepgram:
@@ -53,6 +54,9 @@ def scopes(self) -> Scopes:
5354
def invitations(self) -> Invitations:
5455
return Invitations(self.options)
5556

57+
@property
58+
def extra(self) -> Extra:
59+
return Extra(self.options)
5660

5761

5862
__all__ = ["Deepgram"]

deepgram/_enums.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@ class LiveTranscriptionEvent(Enum):
66
CLOSE = 'close'
77
TRANSCRIPT_RECEIVED = 'transcript_received'
88
ERROR = 'error'
9+
10+
class Caption(Enum):
11+
SRT = 'srt'
12+
WEBVTT = 'webvtt'

deepgram/extra.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import warnings
2+
3+
from ._types import PrerecordedTranscriptionResponse, Options
4+
from ._enums import Caption
5+
6+
class Extra:
7+
"""
8+
Extra post-processing to transform raw Deepgram responses to conveniently-formatted outputs.
9+
"""
10+
11+
def __init__(self, options: Options) -> None:
12+
self.options = options
13+
14+
"""
15+
Helper function to transform a seconds mark into a formatted timestamp.
16+
I.e. 6.564 -> 00:00:06,564
17+
18+
:param seconds:float
19+
:param separator:str
20+
:return Formatted timestamp string.
21+
"""
22+
def _format_timestamp(self, seconds: float, separator: str):
23+
hours = int(seconds // 3600)
24+
minutes = int((seconds % 3600) // 60)
25+
secs = int(seconds % 60)
26+
millis = int((seconds - int(seconds)) * 1000)
27+
return f"{hours:02}:{minutes:02}:{secs:02}{separator}{millis:03}"
28+
29+
"""
30+
Transform a Deepgram PrerecordedTranscriptionResponse into a set of captions.
31+
32+
:param response:PrerecordedTranscriptionResponse: Deepgram response.
33+
:param format:Caption: The caption format enum (SRT or WebVTT).
34+
:param line_length:int: Number of words in each caption line.
35+
:return A string containing the response's captions.
36+
"""
37+
def _to_caption(
38+
self,
39+
response: PrerecordedTranscriptionResponse,
40+
format: Caption,
41+
line_length: int,
42+
):
43+
if "utterances" in response["results"]:
44+
utterances = response["results"]["utterances"]
45+
else:
46+
warnings.warn(
47+
"Enabling the Utterances feature is strongly recommended for captioning. Utterances allow "
48+
"captions to be delimited by pauses. Add request parameter `'utterances': True`."
49+
)
50+
utterances = response["results"]["channels"][0]["alternatives"]
51+
captions = []
52+
line_counter = 1
53+
for utt_index, utt in enumerate(utterances):
54+
words = utterances[utt_index]["words"]
55+
for i in range(0, len(words), line_length):
56+
start_time = words[i]["start"]
57+
end_index = min(len(words) - 1, i + line_length - 1)
58+
end_time = words[end_index]["end"]
59+
text = " ".join([w["word"] for w in words[i:end_index + 1]])
60+
separator = "," if format is Caption.SRT else '.'
61+
prefix = "" if format is Caption.SRT else "- "
62+
caption = (
63+
f"{line_counter}\n"
64+
f"{self._format_timestamp(start_time, separator)} --> "
65+
f"{self._format_timestamp(end_time, separator)}\n"
66+
f"{prefix}{text}"
67+
)
68+
captions.append(caption)
69+
line_counter += 1
70+
return "\n\n".join(captions)
71+
72+
"""
73+
Transform a Deepgram PrerecordedTranscriptionResponse into SRT captions.
74+
75+
:param response:PrerecordedTranscriptionResponse: Deepgram response.
76+
:param line_length:int: Number of words in each caption line. Defaults to 8.
77+
:param readable:bool: If the captions should be printed in a human-readable format,
78+
instead of with newline characters. Defaults to True.
79+
:return Nothing if readable=True, string of captions if readable=False.
80+
"""
81+
def to_SRT(
82+
self,
83+
response: PrerecordedTranscriptionResponse,
84+
line_length: int=8,
85+
readable: bool=True
86+
):
87+
captions = self._to_caption(response, Caption.SRT, line_length)
88+
if not readable:
89+
return captions
90+
print(captions)
91+
92+
"""
93+
Transform a Deepgram PrerecordedTranscriptionResponse into WebVTT captions.
94+
95+
:param response:PrerecordedTranscriptionResponse: Deepgram response.
96+
:param line_length:int: Number of words in each caption line. Defaults to 8.
97+
:param readable:bool: If the captions should be printed in a human-readable format,
98+
instead of with newline characters. Defaults to True.
99+
:return Nothing if readable=True, string of captions if readable=False.
100+
"""
101+
def to_WebVTT(
102+
self,
103+
response: PrerecordedTranscriptionResponse,
104+
line_length: int=8,
105+
readable: bool=True
106+
):
107+
captions = self._to_caption(response, Caption.WEBVTT, line_length)
108+
if not readable:
109+
return captions
110+
print(captions)

tests/__init__.py

Whitespace-only changes.

tests/conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import pytest
2+
3+
def pytest_addoption(parser):
4+
parser.addoption(
5+
"--api-key", "--key", "--token", "-K", "-T", action="store", help="Deepgram API Key"
6+
)
7+
8+
def pytest_configure(config):
9+
pytest.api_key = config.getoption('--api-key')

0 commit comments

Comments
 (0)