@@ -32,7 +32,7 @@ def _init_pipeline(self) -> None:
32
32
try :
33
33
# 检查CUDA可用性
34
34
if torch .cuda .is_available ():
35
- device = "cuda "
35
+ device = "gpu "
36
36
logger .info (f"使用GPU设备: { torch .cuda .get_device_name (0 )} " )
37
37
else :
38
38
device = "cpu"
@@ -64,27 +64,6 @@ def _to_numpy(self, x) -> np.ndarray:
64
64
"""
65
65
return x .cpu ().numpy () if torch .is_tensor (x ) else np .asarray (x )
66
66
67
- def _log_system_resources (self , stage : str ):
68
- """记录系统资源使用情况"""
69
- try :
70
- cpu_percent = psutil .cpu_percent (interval = 1 )
71
- memory = psutil .virtual_memory ()
72
- disk = psutil .disk_usage ("/" )
73
-
74
- logger .info (
75
- f"[{ stage } ] 系统资源 - CPU: { cpu_percent } %, "
76
- f"内存: { memory .percent } % ({ memory .used // 1024 // 1024 } MB/{ memory .total // 1024 // 1024 } MB), "
77
- f"磁盘: { disk .percent } %"
78
- )
79
-
80
- # 检查当前进程资源使用
81
- process = psutil .Process (os .getpid ())
82
- process_memory = process .memory_info ()
83
- logger .info (f"[{ stage } ] 进程内存使用: { process_memory .rss // 1024 // 1024 } MB" )
84
-
85
- except Exception as e :
86
- logger .warning (f"获取系统资源信息失败: { e } " )
87
-
88
67
def extract_voiceprint (self , audio_path : str ) -> np .ndarray :
89
68
"""
90
69
从音频文件中提取声纹特征
@@ -98,30 +77,24 @@ def extract_voiceprint(self, audio_path: str) -> np.ndarray:
98
77
start_time = time .time ()
99
78
logger .info (f"开始提取声纹特征,音频文件: { audio_path } " )
100
79
101
- # 记录推理前系统资源
102
- self ._log_system_resources ("推理前" )
103
-
104
80
try :
105
81
# 使用线程锁确保模型推理的线程安全
106
82
with self ._pipeline_lock :
107
83
pipeline_start = time .time ()
108
- logger .info ("开始模型推理..." )
84
+ logger .debug ("开始模型推理..." )
109
85
110
86
# 检查pipeline是否可用
111
87
if self ._pipeline is None :
112
88
raise RuntimeError ("声纹模型未初始化" )
113
89
114
90
result = self ._pipeline ([audio_path ], output_emb = True )
115
91
pipeline_time = time .time () - pipeline_start
116
- logger .info (f"模型推理完成,耗时: { pipeline_time :.3f} 秒" )
117
-
118
- # 记录推理后系统资源
119
- self ._log_system_resources ("推理后" )
92
+ logger .debug (f"模型推理完成,耗时: { pipeline_time :.3f} 秒" )
120
93
121
94
convert_start = time .time ()
122
95
emb = self ._to_numpy (result ["embs" ][0 ]).astype (np .float32 )
123
96
convert_time = time .time () - convert_start
124
- logger .info (f"数据转换完成,耗时: { convert_time :.3f} 秒" )
97
+ logger .debug (f"数据转换完成,耗时: { convert_time :.3f} 秒" )
125
98
126
99
total_time = time .time () - start_time
127
100
logger .info (
@@ -167,9 +140,9 @@ def register_voiceprint(self, speaker_id: str, audio_bytes: bytes) -> bool:
167
140
"""
168
141
audio_path = None
169
142
try :
170
- # 验证音频文件
171
- if not audio_processor . validate_audio_file (audio_bytes ):
172
- logger .warning (f"音频文件验证失败 : { speaker_id } " )
143
+ # 简化音频验证,只做基本检查
144
+ if len (audio_bytes ) < 1000 : # 文件太小
145
+ logger .warning (f"音频文件过小 : { speaker_id } " )
173
146
return False
174
147
175
148
# 处理音频文件
@@ -214,33 +187,30 @@ def identify_voiceprint(
214
187
215
188
audio_path = None
216
189
try :
217
- # 验证音频文件
218
- validation_start = time .time ()
219
- if not audio_processor .validate_audio_file (audio_bytes ):
220
- logger .warning ("音频文件验证失败" )
190
+ # 简化音频验证
191
+ if len (audio_bytes ) < 1000 :
192
+ logger .warning ("音频文件过小" )
221
193
return "" , 0.0
222
- validation_time = time .time () - validation_start
223
- logger .info (f"音频文件验证完成,耗时: { validation_time :.3f} 秒" )
224
194
225
195
# 处理音频文件
226
196
audio_process_start = time .time ()
227
197
audio_path = audio_processor .ensure_16k_wav (audio_bytes )
228
198
audio_process_time = time .time () - audio_process_start
229
- logger .info (f"音频文件处理完成,耗时: { audio_process_time :.3f} 秒" )
199
+ logger .debug (f"音频文件处理完成,耗时: { audio_process_time :.3f} 秒" )
230
200
231
201
# 提取声纹特征
232
202
extract_start = time .time ()
233
- logger .info ("开始提取声纹特征..." )
203
+ logger .debug ("开始提取声纹特征..." )
234
204
test_emb = self .extract_voiceprint (audio_path )
235
205
extract_time = time .time () - extract_start
236
- logger .info (f"声纹特征提取完成,耗时: { extract_time :.3f} 秒" )
206
+ logger .debug (f"声纹特征提取完成,耗时: { extract_time :.3f} 秒" )
237
207
238
208
# 获取候选声纹特征
239
209
db_query_start = time .time ()
240
- logger .info ("开始查询数据库获取候选声纹特征..." )
210
+ logger .debug ("开始查询数据库获取候选声纹特征..." )
241
211
voiceprints = voiceprint_db .get_voiceprints (speaker_ids )
242
212
db_query_time = time .time () - db_query_start
243
- logger .info (
213
+ logger .debug (
244
214
f"数据库查询完成,获取到{ len (voiceprints )} 个声纹特征,耗时: { db_query_time :.3f} 秒"
245
215
)
246
216
@@ -250,13 +220,13 @@ def identify_voiceprint(
250
220
251
221
# 计算相似度
252
222
similarity_start = time .time ()
253
- logger .info ("开始计算相似度..." )
223
+ logger .debug ("开始计算相似度..." )
254
224
similarities = {}
255
225
for name , emb in voiceprints .items ():
256
226
similarity = self .calculate_similarity (test_emb , emb )
257
227
similarities [name ] = similarity
258
228
similarity_time = time .time () - similarity_start
259
- logger .info (
229
+ logger .debug (
260
230
f"相似度计算完成,共计算{ len (similarities )} 个,耗时: { similarity_time :.3f} 秒"
261
231
)
262
232
0 commit comments