|
| 1 | +import warnings |
| 2 | + |
| 3 | +from ._types import PrerecordedTranscriptionResponse, Options |
| 4 | +from ._enums import Caption |
| 5 | + |
| 6 | +class Extra: |
| 7 | + """ |
| 8 | + Extra post-processing to transform raw Deepgram responses to conveniently-formatted outputs. |
| 9 | + """ |
| 10 | + |
| 11 | + def __init__(self, options: Options) -> None: |
| 12 | + self.options = options |
| 13 | + |
| 14 | + """ |
| 15 | + Helper function to transform a seconds mark into a formatted timestamp. |
| 16 | + I.e. 6.564 -> 00:00:06,564 |
| 17 | +
|
| 18 | + :param seconds:float |
| 19 | + :param separator:str |
| 20 | + :return Formatted timestamp string. |
| 21 | + """ |
| 22 | + def _format_timestamp(self, seconds: float, separator: str): |
| 23 | + hours = int(seconds // 3600) |
| 24 | + minutes = int((seconds % 3600) // 60) |
| 25 | + secs = int(seconds % 60) |
| 26 | + millis = int((seconds - int(seconds)) * 1000) |
| 27 | + return f"{hours:02}:{minutes:02}:{secs:02}{separator}{millis:03}" |
| 28 | + |
| 29 | + """ |
| 30 | + Transform a Deepgram PrerecordedTranscriptionResponse into a set of captions. |
| 31 | +
|
| 32 | + :param response:PrerecordedTranscriptionResponse: Deepgram response. |
| 33 | + :param format:Caption: The caption format enum (SRT or WebVTT). |
| 34 | + :param line_length:int: Number of words in each caption line. |
| 35 | + :return A string containing the response's captions. |
| 36 | + """ |
| 37 | + def _to_caption( |
| 38 | + self, |
| 39 | + response: PrerecordedTranscriptionResponse, |
| 40 | + format: Caption, |
| 41 | + line_length: int, |
| 42 | + ): |
| 43 | + if "utterances" in response["results"]: |
| 44 | + utterances = response["results"]["utterances"] |
| 45 | + else: |
| 46 | + warnings.warn( |
| 47 | + "Enabling the Utterances feature is strongly recommended for captioning. Utterances allow " |
| 48 | + "captions to be delimited by pauses. Add request parameter `'utterances': True`." |
| 49 | + ) |
| 50 | + utterances = response["results"]["channels"][0]["alternatives"] |
| 51 | + captions = [] |
| 52 | + line_counter = 1 |
| 53 | + for utt_index, utt in enumerate(utterances): |
| 54 | + words = utterances[utt_index]["words"] |
| 55 | + for i in range(0, len(words), line_length): |
| 56 | + start_time = words[i]["start"] |
| 57 | + end_index = min(len(words) - 1, i + line_length - 1) |
| 58 | + end_time = words[end_index]["end"] |
| 59 | + text = " ".join([w["word"] for w in words[i:end_index + 1]]) |
| 60 | + separator = "," if format is Caption.SRT else '.' |
| 61 | + prefix = "" if format is Caption.SRT else "- " |
| 62 | + caption = ( |
| 63 | + f"{line_counter}\n" |
| 64 | + f"{self._format_timestamp(start_time, separator)} --> " |
| 65 | + f"{self._format_timestamp(end_time, separator)}\n" |
| 66 | + f"{prefix}{text}" |
| 67 | + ) |
| 68 | + captions.append(caption) |
| 69 | + line_counter += 1 |
| 70 | + return "\n\n".join(captions) |
| 71 | + |
| 72 | + """ |
| 73 | + Transform a Deepgram PrerecordedTranscriptionResponse into SRT captions. |
| 74 | +
|
| 75 | + :param response:PrerecordedTranscriptionResponse: Deepgram response. |
| 76 | + :param line_length:int: Number of words in each caption line. Defaults to 8. |
| 77 | + :param readable:bool: If the captions should be printed in a human-readable format, |
| 78 | + instead of with newline characters. Defaults to True. |
| 79 | + :return Nothing if readable=True, string of captions if readable=False. |
| 80 | + """ |
| 81 | + def to_SRT( |
| 82 | + self, |
| 83 | + response: PrerecordedTranscriptionResponse, |
| 84 | + line_length: int=8, |
| 85 | + readable: bool=True |
| 86 | + ): |
| 87 | + captions = self._to_caption(response, Caption.SRT, line_length) |
| 88 | + if not readable: |
| 89 | + return captions |
| 90 | + print(captions) |
| 91 | + |
| 92 | + """ |
| 93 | + Transform a Deepgram PrerecordedTranscriptionResponse into WebVTT captions. |
| 94 | +
|
| 95 | + :param response:PrerecordedTranscriptionResponse: Deepgram response. |
| 96 | + :param line_length:int: Number of words in each caption line. Defaults to 8. |
| 97 | + :param readable:bool: If the captions should be printed in a human-readable format, |
| 98 | + instead of with newline characters. Defaults to True. |
| 99 | + :return Nothing if readable=True, string of captions if readable=False. |
| 100 | + """ |
| 101 | + def to_WebVTT( |
| 102 | + self, |
| 103 | + response: PrerecordedTranscriptionResponse, |
| 104 | + line_length: int=8, |
| 105 | + readable: bool=True |
| 106 | + ): |
| 107 | + captions = self._to_caption(response, Caption.WEBVTT, line_length) |
| 108 | + if not readable: |
| 109 | + return captions |
| 110 | + print(captions) |
0 commit comments