Update ASR model and add batch size and return

kadirnar · kadirnar · commit 0aab9f8331c2 · 2023-11-24T17:50:36.000+03:00
timestamps options
diff --git a/README.md b/README.md
@@ -79,7 +79,7 @@ pipeline = ASRDiarizationPipeline.from_pretrained(
     device=device,
 )
 
-output_text = pipeline(audio_path)
+output_text = pipeline(audio_path, num_speakers=2, min_speaker=1, max_speaker=2)
 dialogue = format_speech_to_dialogue(output_text)
 print(dialogue)
 ```
diff --git a/whisperplus/pipelines/whisper_diarize.py b/whisperplus/pipelines/whisper_diarize.py
@@ -24,7 +24,7 @@ def __init__(
     @classmethod
     def from_pretrained(
         cls,
-        asr_model: Optional[str] = "openai/whisper-medium",
+        asr_model: Optional[str] = "openai/whisper-large-v3",
         *,
         diarizer_model: Optional[str] = "pyannote/speaker-diarization",
         chunk_length_s: Optional[int] = 30,
@@ -35,7 +35,9 @@ def from_pretrained(
             "automatic-speech-recognition",
             model=asr_model,
             chunk_length_s=chunk_length_s,
-            token=use_auth_token,  # 08/25/2023: Changed argument from use_auth_token to token
+            token=use_auth_token,
+            batch_size=24,
+            return_timestamps=True,
             **kwargs,
         )
         diarization_pipeline = Pipeline.from_pretrained(diarizer_model, use_auth_token=use_auth_token)

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ pipeline = ASRDiarizationPipeline.from_pretrained(`
`79`	`79`	`device=device,`
`80`	`80`	`)`
`81`	`81`
`82`		`-output_text = pipeline(audio_path)`
	`82`	`+output_text = pipeline(audio_path, num_speakers=2, min_speaker=1, max_speaker=2)`
`83`	`83`	`dialogue = format_speech_to_dialogue(output_text)`
`84`	`84`	`print(dialogue)`
`85`	`85`	```