AudioTranscribe/lee.py at master · Leehughes21/AudioTranscribe · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import wave
import contextlib
import os
import whisper
import json

# Parameters
file_path = "output.wav"
segment_length = 10  # seconds
model = whisper.load_model("medium")  # Load the Whisper model

# Get duration of the audio file
with contextlib.closing(wave.open(file_path, 'r')) as f:
    frames = f.getnframes()
    rate = f.getframerate()
    duration = frames / float(rate)

# Create folder for segments
os.makedirs("segments", exist_ok=True)

# Split into chunks using ffmpeg
num_segments = int(duration // segment_length + 1)
for i in range(num_segments):
    start = i * segment_length
    output_name = f"segments/segment_{i}.wav"
    os.system(f"ffmpeg -y -i {file_path} -ss {start} -t {segment_length} {output_name}")

# Transcribe each segment and simulate speaker turns
dialogue = []
for i in range(num_segments):
    segment_file = f"segments/segment_{i}.wav"
    result = model.transcribe(segment_file)
    speaker = f"Speaker {i % 2 + 1}"
    dialogue.append({
        "speaker": speaker,
        "text": result["text"].strip()
    })

# Save to JSON
with open("transcription.json", "w") as f:
    json.dump(dialogue, f, indent=2)

print("Transcription saved to transcription.json")