1
1
import numpy as np
2
2
import torch
3
3
import time
4
+ import psutil
5
+ import os
4
6
from typing import Dict , List , Tuple , Optional
5
7
from modelscope .pipelines import pipeline
6
8
from modelscope .utils .constant import Tasks
@@ -22,14 +24,30 @@ def __init__(self):
22
24
23
25
def _init_pipeline (self ) -> None :
24
26
"""初始化声纹识别模型"""
27
+ start_time = time .time ()
28
+ logger .info ("开始初始化声纹识别模型..." )
29
+
25
30
try :
31
+ # 检查CUDA可用性
32
+ if torch .cuda .is_available ():
33
+ device = "cuda"
34
+ logger .info (f"使用GPU设备: { torch .cuda .get_device_name (0 )} " )
35
+ else :
36
+ device = "cpu"
37
+ logger .info ("使用CPU设备" )
38
+
39
+ logger .info ("开始加载模型: iic/speech_campplus_sv_zh-cn_3dspeaker_16k" )
26
40
self ._pipeline = pipeline (
27
41
task = Tasks .speaker_verification ,
28
42
model = "iic/speech_campplus_sv_zh-cn_3dspeaker_16k" ,
43
+ device = device ,
29
44
)
30
- logger .info ("声纹模型加载成功" )
45
+
46
+ init_time = time .time () - start_time
47
+ logger .info (f"声纹模型加载成功,耗时: { init_time :.3f} 秒" )
31
48
except Exception as e :
32
- logger .error (f"声纹模型加载失败: { e } " )
49
+ init_time = time .time () - start_time
50
+ logger .error (f"声纹模型加载失败,耗时: { init_time :.3f} 秒,错误: { e } " )
33
51
raise
34
52
35
53
def _to_numpy (self , x ) -> np .ndarray :
@@ -44,6 +62,27 @@ def _to_numpy(self, x) -> np.ndarray:
44
62
"""
45
63
return x .cpu ().numpy () if torch .is_tensor (x ) else np .asarray (x )
46
64
65
+ def _log_system_resources (self , stage : str ):
66
+ """记录系统资源使用情况"""
67
+ try :
68
+ cpu_percent = psutil .cpu_percent (interval = 1 )
69
+ memory = psutil .virtual_memory ()
70
+ disk = psutil .disk_usage ("/" )
71
+
72
+ logger .info (
73
+ f"[{ stage } ] 系统资源 - CPU: { cpu_percent } %, "
74
+ f"内存: { memory .percent } % ({ memory .used // 1024 // 1024 } MB/{ memory .total // 1024 // 1024 } MB), "
75
+ f"磁盘: { disk .percent } %"
76
+ )
77
+
78
+ # 检查当前进程资源使用
79
+ process = psutil .Process (os .getpid ())
80
+ process_memory = process .memory_info ()
81
+ logger .info (f"[{ stage } ] 进程内存使用: { process_memory .rss // 1024 // 1024 } MB" )
82
+
83
+ except Exception as e :
84
+ logger .warning (f"获取系统资源信息失败: { e } " )
85
+
47
86
def extract_voiceprint (self , audio_path : str ) -> np .ndarray :
48
87
"""
49
88
从音频文件中提取声纹特征
@@ -57,12 +96,19 @@ def extract_voiceprint(self, audio_path: str) -> np.ndarray:
57
96
start_time = time .time ()
58
97
logger .info (f"开始提取声纹特征,音频文件: { audio_path } " )
59
98
99
+ # 记录推理前系统资源
100
+ self ._log_system_resources ("推理前" )
101
+
60
102
try :
61
103
pipeline_start = time .time ()
104
+ logger .info ("开始模型推理..." )
62
105
result = self ._pipeline ([audio_path ], output_emb = True )
63
106
pipeline_time = time .time () - pipeline_start
64
107
logger .info (f"模型推理完成,耗时: { pipeline_time :.3f} 秒" )
65
108
109
+ # 记录推理后系统资源
110
+ self ._log_system_resources ("推理后" )
111
+
66
112
convert_start = time .time ()
67
113
emb = self ._to_numpy (result ["embs" ][0 ]).astype (np .float32 )
68
114
convert_time = time .time () - convert_start
0 commit comments