Get (relative) timestamps for whisper.decode (no transcribe call) #1141
Replies: 1 comment 1 reply
-
I did it myself! here is my totally bad code, but it works: def transcribe_test(input_audio, prompt=""):
prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,,!!??::”)]}、",
global tokenizer
audio = whisper.load_audio(input_audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
options = whisper.DecodingOptions(prompt=prompt, max_initial_timestamp=None, without_timestamps=False)
result = whisper.decode(model, mel, options)
if tokenizer is None:
tokenizer = get_tokenizer(multilingual=model.is_multilingual, language='en', task=options.task)
text_tokens = [tokenizer.decode([t]) for t in result.tokens]
colored_text = get_colored_text(text_tokens, result.token_probs, tokenizer, prompt)
starttime=time.time()
segments = [{"seek": 0, "start": 0, "end": len(audio) / SAMPLE_RATE, "tokens": result.tokens}]
add_word_timestamps(
segments=segments,
model=model,
tokenizer=tokenizer,
mel=mel,
num_frames=mel.shape[-1],
prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations,
)
word_timestamps = segments[0]["words"]
print(f"time: {time.time() - starttime}")
return result.text, colored_text, text_tokens, result.token_probs, word_timestamps |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Hello!
I need to get timestamps with the following code:
How can I extract the timestamps there without calling "model.transcribe()"?
I tried to look at the timing.py but still don't know how...
Thank you very much!
Beta Was this translation helpful? Give feedback.
All reactions