-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Description
`import sys
import os
sys.path.insert(0, './FunASR-main')
sys.path.insert(0, '/path/to/your/FunASR-main')
import time
import numpy as np
from funasr import AutoModel
import funasr
from typing import List, Dict
def format_recognition_result(res: List[Dict]) -> str:
formatted_output = []
for result in res:
if not isinstance(result, dict):
return result
sentences = result.get('sentence_info')
if not sentences:
continue
formatted_output.append("res:\n")
for sentence in sentences:
if not isinstance(sentence, dict):
continue
speaker_id = sentence.get('spk')
text = sentence.get('text', '')
start_time = sentence.get('start', 0) / 1000
end_time = sentence.get('end', 0) / 1000
formatted_sentence = (
f" {speaker_id} "
f"[{start_time:.2f}s - {end_time:.2f}s]: "
f"{text}"
)
formatted_output.append(formatted_sentence)
return "\n".join(formatted_output)
sensevoicesmall_model_dir = "./funasr_models/iic/SenseVoiceSmall"
vad_model_dir = "./funasr_models/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
paraformer_model_dir = "./funasr_models/iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
punc_ct_model_dir = "./funasr_models/iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_model_dir = "./funasr_models/iic/speech_campplus_sv_zh-cn_16k-common"
s_time = time.time()
model = AutoModel(
model = paraformer_model_dir,
vad_model=vad_model_dir,
vad_kwargs={"max_single_segment_time": 30000},
punc_model=punc_ct_model_dir,
spk_model=cam_model_dir,
device="cuda",
)
print(model.model_path)
load_time = time.time()
input_file = f"./data/雷军测试.wav"
input_file = f"asr_speaker_demo.wav"
res = model.generate(
input=input_file,
cache={},
language="zn", # "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn=True,
batch_size_s=60,
# merge_vad=True,
merge_length_s=15,
output_timestamp=True,
output_spk_embedding=True
)
print(res)
print(format_recognition_result(res))