Skip to content

Commit 728a769

Browse files
committed
update:优化速度
1 parent c18bc1b commit 728a769

File tree

3 files changed

+29
-58
lines changed

3 files changed

+29
-58
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
conda remove -n voiceprint-api --all -y
1212
conda create -n voiceprint-api python=3.10 -y
1313
conda activate voiceprint-api
14+
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
1415

1516
pip install -r requirements.txt
1617
```

app/services/voiceprint_service.py

Lines changed: 17 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def _init_pipeline(self) -> None:
3232
try:
3333
# 检查CUDA可用性
3434
if torch.cuda.is_available():
35-
device = "cuda"
35+
device = "gpu"
3636
logger.info(f"使用GPU设备: {torch.cuda.get_device_name(0)}")
3737
else:
3838
device = "cpu"
@@ -64,27 +64,6 @@ def _to_numpy(self, x) -> np.ndarray:
6464
"""
6565
return x.cpu().numpy() if torch.is_tensor(x) else np.asarray(x)
6666

67-
def _log_system_resources(self, stage: str):
68-
"""记录系统资源使用情况"""
69-
try:
70-
cpu_percent = psutil.cpu_percent(interval=1)
71-
memory = psutil.virtual_memory()
72-
disk = psutil.disk_usage("/")
73-
74-
logger.info(
75-
f"[{stage}] 系统资源 - CPU: {cpu_percent}%, "
76-
f"内存: {memory.percent}% ({memory.used//1024//1024}MB/{memory.total//1024//1024}MB), "
77-
f"磁盘: {disk.percent}%"
78-
)
79-
80-
# 检查当前进程资源使用
81-
process = psutil.Process(os.getpid())
82-
process_memory = process.memory_info()
83-
logger.info(f"[{stage}] 进程内存使用: {process_memory.rss//1024//1024}MB")
84-
85-
except Exception as e:
86-
logger.warning(f"获取系统资源信息失败: {e}")
87-
8867
def extract_voiceprint(self, audio_path: str) -> np.ndarray:
8968
"""
9069
从音频文件中提取声纹特征
@@ -98,30 +77,24 @@ def extract_voiceprint(self, audio_path: str) -> np.ndarray:
9877
start_time = time.time()
9978
logger.info(f"开始提取声纹特征,音频文件: {audio_path}")
10079

101-
# 记录推理前系统资源
102-
self._log_system_resources("推理前")
103-
10480
try:
10581
# 使用线程锁确保模型推理的线程安全
10682
with self._pipeline_lock:
10783
pipeline_start = time.time()
108-
logger.info("开始模型推理...")
84+
logger.debug("开始模型推理...")
10985

11086
# 检查pipeline是否可用
11187
if self._pipeline is None:
11288
raise RuntimeError("声纹模型未初始化")
11389

11490
result = self._pipeline([audio_path], output_emb=True)
11591
pipeline_time = time.time() - pipeline_start
116-
logger.info(f"模型推理完成,耗时: {pipeline_time:.3f}秒")
117-
118-
# 记录推理后系统资源
119-
self._log_system_resources("推理后")
92+
logger.debug(f"模型推理完成,耗时: {pipeline_time:.3f}秒")
12093

12194
convert_start = time.time()
12295
emb = self._to_numpy(result["embs"][0]).astype(np.float32)
12396
convert_time = time.time() - convert_start
124-
logger.info(f"数据转换完成,耗时: {convert_time:.3f}秒")
97+
logger.debug(f"数据转换完成,耗时: {convert_time:.3f}秒")
12598

12699
total_time = time.time() - start_time
127100
logger.info(
@@ -167,9 +140,9 @@ def register_voiceprint(self, speaker_id: str, audio_bytes: bytes) -> bool:
167140
"""
168141
audio_path = None
169142
try:
170-
# 验证音频文件
171-
if not audio_processor.validate_audio_file(audio_bytes):
172-
logger.warning(f"音频文件验证失败: {speaker_id}")
143+
# 简化音频验证,只做基本检查
144+
if len(audio_bytes) < 1000: # 文件太小
145+
logger.warning(f"音频文件过小: {speaker_id}")
173146
return False
174147

175148
# 处理音频文件
@@ -214,33 +187,30 @@ def identify_voiceprint(
214187

215188
audio_path = None
216189
try:
217-
# 验证音频文件
218-
validation_start = time.time()
219-
if not audio_processor.validate_audio_file(audio_bytes):
220-
logger.warning("音频文件验证失败")
190+
# 简化音频验证
191+
if len(audio_bytes) < 1000:
192+
logger.warning("音频文件过小")
221193
return "", 0.0
222-
validation_time = time.time() - validation_start
223-
logger.info(f"音频文件验证完成,耗时: {validation_time:.3f}秒")
224194

225195
# 处理音频文件
226196
audio_process_start = time.time()
227197
audio_path = audio_processor.ensure_16k_wav(audio_bytes)
228198
audio_process_time = time.time() - audio_process_start
229-
logger.info(f"音频文件处理完成,耗时: {audio_process_time:.3f}秒")
199+
logger.debug(f"音频文件处理完成,耗时: {audio_process_time:.3f}秒")
230200

231201
# 提取声纹特征
232202
extract_start = time.time()
233-
logger.info("开始提取声纹特征...")
203+
logger.debug("开始提取声纹特征...")
234204
test_emb = self.extract_voiceprint(audio_path)
235205
extract_time = time.time() - extract_start
236-
logger.info(f"声纹特征提取完成,耗时: {extract_time:.3f}秒")
206+
logger.debug(f"声纹特征提取完成,耗时: {extract_time:.3f}秒")
237207

238208
# 获取候选声纹特征
239209
db_query_start = time.time()
240-
logger.info("开始查询数据库获取候选声纹特征...")
210+
logger.debug("开始查询数据库获取候选声纹特征...")
241211
voiceprints = voiceprint_db.get_voiceprints(speaker_ids)
242212
db_query_time = time.time() - db_query_start
243-
logger.info(
213+
logger.debug(
244214
f"数据库查询完成,获取到{len(voiceprints)}个声纹特征,耗时: {db_query_time:.3f}秒"
245215
)
246216

@@ -250,13 +220,13 @@ def identify_voiceprint(
250220

251221
# 计算相似度
252222
similarity_start = time.time()
253-
logger.info("开始计算相似度...")
223+
logger.debug("开始计算相似度...")
254224
similarities = {}
255225
for name, emb in voiceprints.items():
256226
similarity = self.calculate_similarity(test_emb, emb)
257227
similarities[name] = similarity
258228
similarity_time = time.time() - similarity_start
259-
logger.info(
229+
logger.debug(
260230
f"相似度计算完成,共计算{len(similarities)}个,耗时: {similarity_time:.3f}秒"
261231
)
262232

app/utils/audio_utils.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def ensure_16k_wav(self, audio_bytes: bytes) -> str:
3131
str: 临时文件路径
3232
"""
3333
start_time = time.time()
34-
logger.info(f"开始音频处理,输入大小: {len(audio_bytes)}字节")
34+
logger.debug(f"开始音频处理,输入大小: {len(audio_bytes)}字节")
3535

3636
with tempfile.NamedTemporaryFile(
3737
delete=False, suffix=".wav", dir=self.tmp_dir
@@ -44,14 +44,14 @@ def ensure_16k_wav(self, audio_bytes: bytes) -> str:
4444
read_start = time.time()
4545
data, sr = sf.read(tmp_path)
4646
read_time = time.time() - read_start
47-
logger.info(
47+
logger.debug(
4848
f"音频文件读取完成,采样率: {sr}Hz,时长: {len(data)/sr:.2f}秒,耗时: {read_time:.3f}秒"
4949
)
5050

5151
if sr != self.target_sample_rate:
5252
# librosa重采样,支持多通道
5353
resample_start = time.time()
54-
logger.info(f"开始音频重采样: {sr}Hz -> {self.target_sample_rate}Hz")
54+
logger.debug(f"开始音频重采样: {sr}Hz -> {self.target_sample_rate}Hz")
5555

5656
if data.ndim == 1:
5757
data_rs = librosa.resample(
@@ -70,16 +70,16 @@ def ensure_16k_wav(self, audio_bytes: bytes) -> str:
7070
).T
7171

7272
resample_time = time.time() - resample_start
73-
logger.info(f"音频重采样完成,耗时: {resample_time:.3f}秒")
73+
logger.debug(f"音频重采样完成,耗时: {resample_time:.3f}秒")
7474

7575
# 写入重采样后的音频
7676
write_start = time.time()
7777
sf.write(tmp_path, data_rs, self.target_sample_rate)
7878
write_time = time.time() - write_start
79-
logger.info(f"重采样音频写入完成,耗时: {write_time:.3f}秒")
79+
logger.debug(f"重采样音频写入完成,耗时: {write_time:.3f}秒")
8080

8181
total_time = time.time() - start_time
82-
logger.info(f"音频处理完成,总耗时: {total_time:.3f}秒")
82+
logger.debug(f"音频处理完成,总耗时: {total_time:.3f}秒")
8383
return tmp_path
8484

8585
except Exception as e:
@@ -92,7 +92,7 @@ def ensure_16k_wav(self, audio_bytes: bytes) -> str:
9292

9393
def validate_audio_file(self, audio_bytes: bytes) -> bool:
9494
"""
95-
验证音频文件格式是否有效
95+
验证音频文件格式是否有效(简化版本)
9696
9797
Args:
9898
audio_bytes: 音频字节数据
@@ -101,7 +101,7 @@ def validate_audio_file(self, audio_bytes: bytes) -> bool:
101101
bool: 音频文件是否有效
102102
"""
103103
start_time = time.time()
104-
logger.info(f"开始音频文件验证,输入大小: {len(audio_bytes)}字节")
104+
logger.debug(f"开始音频文件验证,输入大小: {len(audio_bytes)}字节")
105105

106106
try:
107107
with tempfile.NamedTemporaryFile(
@@ -114,7 +114,7 @@ def validate_audio_file(self, audio_bytes: bytes) -> bool:
114114
read_start = time.time()
115115
data, sr = sf.read(tmp_path)
116116
read_time = time.time() - read_start
117-
logger.info(
117+
logger.debug(
118118
f"音频文件读取完成,采样率: {sr}Hz,数据长度: {len(data)},耗时: {read_time:.3f}秒"
119119
)
120120

@@ -138,7 +138,7 @@ def validate_audio_file(self, audio_bytes: bytes) -> bool:
138138
return False
139139

140140
total_time = time.time() - start_time
141-
logger.info(
141+
logger.debug(
142142
f"音频验证通过: {duration:.2f}秒, {sr}Hz,总耗时: {total_time:.3f}秒"
143143
)
144144
return True
@@ -164,7 +164,7 @@ def cleanup_temp_file(self, file_path: str) -> None:
164164
os.remove(file_path)
165165
logger.debug(f"临时文件已清理: {file_path}")
166166
except Exception as e:
167-
logger.warning(f"清理临时文件失败 {file_path}: {e}")
167+
logger.debug(f"清理临时文件失败 {file_path}: {e}")
168168

169169

170170
# 全局音频处理器实例

0 commit comments

Comments
 (0)