Skip to content

Commit 620ed34

Browse files
authored
fix (mistral-ai): add flexibility for timestamps (#4404)
1 parent e9ac896 commit 620ed34

File tree

1 file changed

+5
-7
lines changed
  • livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai

1 file changed

+5
-7
lines changed

livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai/stt.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@
4040
@dataclass
4141
class _STTOptions:
4242
model: STTModels | str
43-
language: str
43+
language: str | None
4444

4545

4646
class STT(stt.STT):
4747
def __init__(
4848
self,
4949
*,
50-
language: str = "en",
50+
language: str | None = "en",
5151
model: STTModels | str = "voxtral-mini-latest",
5252
api_key: NotGivenOr[str] = NOT_GIVEN,
5353
client: Mistral | None = None,
@@ -56,7 +56,7 @@ def __init__(
5656
Create a new instance of MistralAI STT.
5757
5858
Args:
59-
language: The language code to use for transcription (e.g., "en" for English).
59+
language: The language code to use for transcription (e.g., "en" for English). Segment timestamps will only be available if set to None.
6060
model: The MistralAI model to use for transcription, default is voxtral-mini-latest.
6161
api_key: Your MistralAI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
6262
client: Optional pre-configured MistralAI client instance.
@@ -66,7 +66,6 @@ def __init__(
6666
capabilities=stt.STTCapabilities(
6767
streaming=False,
6868
interim_results=False,
69-
# timestamp granularity doesn't seem to work
7069
aligned_transcript=False,
7170
)
7271
)
@@ -123,16 +122,15 @@ async def _recognize_impl(
123122
model=self._opts.model,
124123
file={"content": data, "file_name": "audio.wav"},
125124
language=self._opts.language if self._opts.language else None,
126-
# for some reason, it doesn't return any segments even if we ask for them
127-
timestamp_granularities=["segment"],
125+
timestamp_granularities=None if self._opts.language else ["segment"],
128126
)
129127

130128
return stt.SpeechEvent(
131129
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
132130
alternatives=[
133131
stt.SpeechData(
134132
text=resp.text,
135-
language=self._opts.language,
133+
language=self._opts.language if self._opts.language else "",
136134
start_time=resp.segments[0].start if resp.segments else 0,
137135
end_time=resp.segments[-1].end if resp.segments else 0,
138136
words=[

0 commit comments

Comments
 (0)