Skip to content

When I used a 1-hour audio file, the first attempt took 2 minutes, but the second attempt froze completely. What could be the problem? #2770

@jinwater88

Description

@jinwater88

`import sys
import os
sys.path.insert(0, './FunASR-main')

sys.path.insert(0, '/path/to/your/FunASR-main')

import time
import numpy as np
from funasr import AutoModel
import funasr
from typing import List, Dict
def format_recognition_result(res: List[Dict]) -> str:
formatted_output = []

for result in res:
    if not isinstance(result, dict):
        return result
        
    sentences = result.get('sentence_info')
    if not sentences:
        continue

    formatted_output.append("res:\n")
    
    for sentence in sentences:
        if not isinstance(sentence, dict):
            continue
            
        speaker_id = sentence.get('spk')
        text = sentence.get('text', '')
        start_time = sentence.get('start', 0) / 1000 
        end_time = sentence.get('end', 0) / 1000

        formatted_sentence = (
            f" {speaker_id} "
            f"[{start_time:.2f}s - {end_time:.2f}s]: "
            f"{text}"
        )
        formatted_output.append(formatted_sentence)

return "\n".join(formatted_output)

sensevoicesmall_model_dir = "./funasr_models/iic/SenseVoiceSmall"
vad_model_dir = "./funasr_models/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"

paraformer_model_dir = "./funasr_models/iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
punc_ct_model_dir = "./funasr_models/iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_model_dir = "./funasr_models/iic/speech_campplus_sv_zh-cn_16k-common"

s_time = time.time()
model = AutoModel(
model = paraformer_model_dir,
vad_model=vad_model_dir,
vad_kwargs={"max_single_segment_time": 30000},
punc_model=punc_ct_model_dir,
spk_model=cam_model_dir,
device="cuda",
)
print(model.model_path)
load_time = time.time()

input_file = f"./data/雷军测试.wav"

input_file = f"asr_speaker_demo.wav"

res = model.generate(
input=input_file,
cache={},
language="zn", # "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn=True,
batch_size_s=60,
# merge_vad=True,
merge_length_s=15,
output_timestamp=True,
output_spk_embedding=True
)

print(res)

print(format_recognition_result(res))

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions