wip

ks6088ts · ks6088ts · commit e8bd175804fc · 2024-10-09T08:25:14.000+09:00
diff --git a/apps/16_whisper_transcription/README.md b/apps/16_whisper_transcription/README.md
@@ -1,3 +1,4 @@
 # References
 
 - [openai/whisper](https://github.com/openai/whisper)
+- [Improve --model argument handling and help message #1764](https://github.com/openai/whisper/pull/1764)
diff --git a/apps/16_whisper_transcription/main.py b/apps/16_whisper_transcription/main.py
@@ -1,13 +1,26 @@
 import whisper
 
-model = whisper.load_model("turbo")
+# https://github.com/openai/whisper?tab=readme-ov-file#python-usage
+model_name = "turbo"
+file_path = "dist/sample_audio.wav"
+
+model = whisper.load_model(name=model_name)
 
 # load audio and pad/trim it to fit 30 seconds
-audio = whisper.load_audio("apps/16_whisper_transcription/sample_audio.wav")
-audio = whisper.pad_or_trim(audio)
+audio = whisper.load_audio(
+    file=file_path,
+)
+audio = whisper.pad_or_trim(
+    array=audio,
+    length=30 * 16000,
+)
 
 # make log-Mel spectrogram and move to the same device as the model
-mel = whisper.log_mel_spectrogram(audio).to(model.device)
+# https://github.com/openai/whisper/pull/1764
+mel = whisper.log_mel_spectrogram(
+    audio=audio,
+    n_mels=128,
+).to(model.device)
 
 # detect the spoken language
 _, probs = model.detect_language(mel)

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
`1`	`1`	`# References`
`2`	`2`
`3`	`3`	`- [openai/whisper](https://github.com/openai/whisper)`
	`4`	`+- [Improve --model argument handling and help message #1764](https://github.com/openai/whisper/pull/1764)`