feat: 💬 Podcast audio transcript in Python (#108)

philippart-s · web-flow · commit d68584699d83 · 2025-08-27T16:38:17.000+02:00
* feat: 💬 Podcast audio transcript in Python

* doc: 📝 add README

* clean: 🗑️ remove empty lines
diff --git a/ai/ai-endpoints/README.md b/ai/ai-endpoints/README.md
@@ -25,6 +25,7 @@ Don't hesitate to use the source code and give us feedback.
 
 ### 🐍 Python 🐍
 
+  - [Podcast audio transcript](./podcast-transcript-whisper/python/)
   - [Chatbot with LangChain](./python-langchain-chatbot/): blocking mode, streaming mode, RAG mode.
   - [Streaming chatbot](./python-langchain-chatbot/) with LangChain
   - [Audio Summarizer Assistant](./audio-summarizer-assistant/) by connecting Speech-To-Text and LLM
diff --git a/ai/ai-endpoints/podcast-transcript-whisper/python/PodcastTranscriptWithWhisper.py b/ai/ai-endpoints/podcast-transcript-whisper/python/PodcastTranscriptWithWhisper.py
@@ -0,0 +1,37 @@
+import os
+import json
+from openai import OpenAI
+
+# 🛠️ OpenAI client initialisation
+client = OpenAI(base_url=os.environ.get('OVH_AI_ENDPOINTS_WHISPER_URL'), 
+                api_key=os.environ.get('OVH_AI_ENDPOINTS_ACCESS_TOKEN'))
+
+# 🎼 Audio file loading
+with open("../resources/TdT20-trimed-2.mp3", "rb") as audio_file:
+    # 📝 Call Whisper transcription API
+    transcript = client.audio.transcriptions.create(
+        model=os.environ.get('OVH_AI_ENDPOINTS_WHISPER_MODEL'),
+        file=audio_file,
+        temperature=0.0,
+        response_format="verbose_json",
+        extra_body={"diarize": True},
+    )
+
+# 🔀 Merge the dialog said by the same speaker     
+diarizedTranscript = ''
+speakers = ["Aurélie", "Guillaume", "Stéphane"]
+previousSpeaker = -1
+jsonTranscript = json.loads(transcript.model_dump_json())
+
+# 💬 Only the diarization field is useful
+for dialog in jsonTranscript["diarization"]:
+    speaker = dialog.get("speaker")
+    text = dialog.get("text")
+    if (previousSpeaker == speaker):
+        diarizedTranscript += f" {text}"
+    else:
+        diarizedTranscript += f"\n\n{speakers[speaker]}: {text}"
+    previousSpeaker = speaker
+
+
+print(f"\n📝 Diarized Transcript 📝:\n{diarizedTranscript}")
diff --git a/ai/ai-endpoints/podcast-transcript-whisper/python/README.md b/ai/ai-endpoints/podcast-transcript-whisper/python/README.md
@@ -0,0 +1,14 @@
+# 🛠️ Setup environment 🛠️
+ - create the following environment variables:
+```bash
+OVH_AI_ENDPOINTS_WHISPER_URL=<whisper model URL>
+OVH_AI_ENDPOINTS_ACCESS_TOKEN=<your_access_token>
+OVH_AI_ENDPOINTS_WHISPER_MODEL=whisper-large-v3
+```
+ - install required dependencies: `pip install -r requirements.txt`
+
+# 🚀 Run the application 🚀
+
+```bash
+$ python PodcastTranscriptWithWhisper.py
+```
diff --git a/ai/ai-endpoints/podcast-transcript-whisper/python/requirements.txt b/ai/ai-endpoints/podcast-transcript-whisper/python/requirements.txt
@@ -0,0 +1 @@
+openai==1.97.0
diff --git a/ai/ai-endpoints/podcast-transcript-whisper/resources/TdT20-trimed-2.mp3 b/ai/ai-endpoints/podcast-transcript-whisper/resources/TdT20-trimed-2.mp3