When I used a 1-hour audio file, the first attempt took 2 minutes, but the second attempt froze completely. What could be the problem?

`import sys
import os
sys.path.insert(0, './FunASR-main') 
# sys.path.insert(0, '/path/to/your/FunASR-main')

import time
import numpy as np
from funasr import AutoModel
import funasr
from typing import List, Dict
def format_recognition_result(res: List[Dict]) -> str:
    formatted_output = []

    for result in res:
        if not isinstance(result, dict):
            return result
            
        sentences = result.get('sentence_info')
        if not sentences:
            continue

        formatted_output.append("res：\n")
        
        for sentence in sentences:
            if not isinstance(sentence, dict):
                continue
                
            speaker_id = sentence.get('spk')
            text = sentence.get('text', '')
            start_time = sentence.get('start', 0) / 1000 
            end_time = sentence.get('end', 0) / 1000

            formatted_sentence = (
                f" {speaker_id} "
                f"[{start_time:.2f}s - {end_time:.2f}s]: "
                f"{text}"
            )
            formatted_output.append(formatted_sentence)

    return "\n".join(formatted_output)

sensevoicesmall_model_dir = "./funasr_models/iic/SenseVoiceSmall"
vad_model_dir = "./funasr_models/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"

paraformer_model_dir = "./funasr_models/iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
punc_ct_model_dir = "./funasr_models/iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_model_dir = "./funasr_models/iic/speech_campplus_sv_zh-cn_16k-common"

s_time = time.time()
model = AutoModel(
    model = paraformer_model_dir,
    vad_model=vad_model_dir,
    vad_kwargs={"max_single_segment_time": 30000},
    punc_model=punc_ct_model_dir,
    spk_model=cam_model_dir,
    device="cuda",
)
print(model.model_path)
load_time = time.time()
# input_file = f"./data/雷军测试.wav"
input_file = f"asr_speaker_demo.wav"

res = model.generate(
    input=input_file,
    cache={},
    language="zn",  # "zn", "en", "yue", "ja", "ko", "nospeech"
    use_itn=True,
    batch_size_s=60,
    # merge_vad=True,     
    merge_length_s=15,    
    output_timestamp=True, 
    output_spk_embedding=True   
)
# print(res)
print(format_recognition_result(res))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

When I used a 1-hour audio file, the first attempt took 2 minutes, but the second attempt froze completely. What could be the problem? #2770

sys.path.insert(0, '/path/to/your/FunASR-main')

input_file = f"./data/雷军测试.wav"

print(res)

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

When I used a 1-hour audio file, the first attempt took 2 minutes, but the second attempt froze completely. What could be the problem? #2770

Description

sys.path.insert(0, '/path/to/your/FunASR-main')

input_file = f"./data/雷军测试.wav"

print(res)

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions