wip

ks6088ts · ks6088ts · commit ea681ca120f5 · 2024-10-08T23:19:00.000+09:00
diff --git a/apps/16_whisper_transcription/README.md b/apps/16_whisper_transcription/README.md
@@ -0,0 +1,3 @@
+# References
+
+- [openai/whisper](https://github.com/openai/whisper)
diff --git a/apps/16_whisper_transcription/main.py b/apps/16_whisper_transcription/main.py
@@ -0,0 +1,21 @@
+import whisper
+
+model = whisper.load_model("turbo")
+
+# load audio and pad/trim it to fit 30 seconds
+audio = whisper.load_audio("apps/16_whisper_transcription/sample_audio.wav")
+audio = whisper.pad_or_trim(audio)
+
+# make log-Mel spectrogram and move to the same device as the model
+mel = whisper.log_mel_spectrogram(audio).to(model.device)
+
+# detect the spoken language
+_, probs = model.detect_language(mel)
+print(f"Detected language: {max(probs, key=probs.get)}")
+
+# decode the audio
+options = whisper.DecodingOptions()
+result = whisper.decode(model, mel, options)
+
+# print the recognized text
+print(result.text)
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ lxml = "^5.3.0"
 nest-asyncio = "^1.6.0"
 typer = "^0.12.5"
 azure-cognitiveservices-speech = "^1.40.0"
+openai-whisper = "^20240930"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^4.0.0"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# References`
	`2`	`+`
	`3`	`+- [openai/whisper](https://github.com/openai/whisper)`