update:增加环节耗时统计，调试响应速度

openrz · openrz · commit a8341a4fc584 · 2025-07-03T21:42:15.000+08:00
diff --git a/app/api/v1/health.py b/app/api/v1/health.py
@@ -1,4 +1,5 @@
 from fastapi import APIRouter, HTTPException, Query
+import time
 from ...services.voiceprint_service import voiceprint_service
 from ...core.logging import get_logger
 from ...core.config import settings
@@ -29,14 +30,28 @@ async def health_check(
     Raises:
         HTTPException: 当密钥不正确时返回401错误
     """
+    start_time = time.time()
+    logger.info("收到健康检查请求")
+
     # 验证密钥
+    key_check_start = time.time()
     if key != settings.api_token:
         logger.warning(f"健康检查接口收到无效密钥: {key}")
         raise HTTPException(status_code=401, detail="密钥验证失败")
+    key_check_time = time.time() - key_check_start
+    logger.info(f"密钥验证完成，耗时: {key_check_time:.3f}秒")
 
     try:
+        count_start = time.time()
+        logger.info("开始获取声纹统计信息...")
         count = voiceprint_service.get_voiceprint_count()
+        count_time = time.time() - count_start
+        logger.info(f"声纹统计信息获取完成，总数: {count}，耗时: {count_time:.3f}秒")
+
+        total_time = time.time() - start_time
+        logger.info(f"健康检查请求完成，总耗时: {total_time:.3f}秒")
         return {"total_voiceprints": count, "status": "healthy"}
     except Exception as e:
-        logger.error(f"获取统计信息异常: {e}")
+        total_time = time.time() - start_time
+        logger.error(f"获取统计信息异常，总耗时: {total_time:.3f}秒，错误: {e}")
         raise HTTPException(status_code=500, detail=f"获取统计信息失败: {str(e)}")
diff --git a/app/api/v1/voiceprint.py b/app/api/v1/voiceprint.py
@@ -1,6 +1,7 @@
 from fastapi import APIRouter, File, UploadFile, Form, HTTPException, Depends
 from fastapi.security import HTTPBearer
 from typing import List
+import time
 from ...models.voiceprint import VoiceprintRegisterResponse, VoiceprintIdentifyResponse
 from ...services.voiceprint_service import voiceprint_service
 from ...api.dependencies import AuthorizationToken
@@ -83,30 +84,58 @@ async def identify_voiceprint(
     Returns:
         VoiceprintIdentifyResponse: 识别结果
     """
+    start_time = time.time()
+    logger.info(f"开始声纹识别请求 - 候选说话人: {speaker_ids}, 文件: {file.filename}")
+
     try:
         # 验证文件类型
+        validation_start = time.time()
         if not file.filename.lower().endswith(".wav"):
             raise HTTPException(status_code=400, detail="只支持WAV格式音频文件")
+        validation_time = time.time() - validation_start
+        logger.info(f"文件类型验证完成，耗时: {validation_time:.3f}秒")
 
         # 解析候选说话人ID
+        parse_start = time.time()
         candidate_ids = [x.strip() for x in speaker_ids.split(",") if x.strip()]
         if not candidate_ids:
             raise HTTPException(status_code=400, detail="候选说话人ID不能为空")
+        parse_time = time.time() - parse_start
+        logger.info(
+            f"候选说话人ID解析完成，共{len(candidate_ids)}个，耗时: {parse_time:.3f}秒"
+        )
 
         # 读取音频数据
+        read_start = time.time()
         audio_bytes = await file.read()
+        read_time = time.time() - read_start
+        logger.info(
+            f"音频文件读取完成，大小: {len(audio_bytes)}字节，耗时: {read_time:.3f}秒"
+        )
 
         # 识别声纹
+        identify_start = time.time()
+        logger.info("开始调用声纹识别服务...")
         match_name, match_score = voiceprint_service.identify_voiceprint(
             candidate_ids, audio_bytes
         )
+        identify_time = time.time() - identify_start
+        logger.info(f"声纹识别服务调用完成，耗时: {identify_time:.3f}秒")
+
+        total_time = time.time() - start_time
+        logger.info(
+            f"声纹识别请求完成，总耗时: {total_time:.3f}秒，识别结果: {match_name}, 分数: {match_score:.4f}"
+        )
 
         return VoiceprintIdentifyResponse(speaker_id=match_name, score=match_score)
 
     except HTTPException:
+        total_time = time.time() - start_time
+        logger.error(f"声纹识别请求失败，总耗时: {total_time:.3f}秒")
         raise
     except Exception as e:
-        logger.error(f"声纹识别异常: {e}")
+        total_time = time.time() - start_time
+        logger.error(f"声纹识别异常，总耗时: {total_time:.3f}秒，错误: {e}")
         raise HTTPException(status_code=500, detail=f"声纹识别失败: {str(e)}")
 
 
diff --git a/app/database/voiceprint_db.py b/app/database/voiceprint_db.py
@@ -1,4 +1,5 @@
 import numpy as np
+import time
 from typing import Dict, List, Optional
 from .connection import db_connection
 from ..core.logging import get_logger
@@ -46,6 +47,14 @@ def get_voiceprints(
         Returns:
             Dict[str, np.ndarray]: {speaker_id: 特征向量}
         """
+        start_time = time.time()
+        query_type = (
+            f"指定ID查询({len(speaker_ids) if speaker_ids else 0}个)"
+            if speaker_ids
+            else "全量查询"
+        )
+        logger.info(f"开始数据库查询: {query_type}")
+
         try:
             with db_connection.get_cursor() as cursor:
                 if speaker_ids:
@@ -56,15 +65,29 @@ def get_voiceprints(
                     sql = "SELECT speaker_id, feature_vector FROM voiceprints"
                     cursor.execute(sql)
 
+                fetch_start = time.time()
                 results = cursor.fetchall()
+                fetch_time = time.time() - fetch_start
+                logger.info(
+                    f"数据库查询完成，获取到{len(results)}条记录，查询耗时: {fetch_time:.3f}秒"
+                )
+
                 # 将数据库中的二进制特征转为numpy数组
+                convert_start = time.time()
                 voiceprints = {
                     row[0]: np.frombuffer(row[1], dtype=np.float32) for row in results
                 }
-                logger.info(f"获取到 {len(voiceprints)} 个声纹特征")
+                convert_time = time.time() - convert_start
+                logger.info(f"数据转换完成，转换耗时: {convert_time:.3f}秒")
+
+                total_time = time.time() - start_time
+                logger.info(
+                    f"获取到 {len(voiceprints)} 个声纹特征，总耗时: {total_time:.3f}秒"
+                )
                 return voiceprints
         except Exception as e:
-            logger.error(f"获取声纹特征失败: {e}")
+            total_time = time.time() - start_time
+            logger.error(f"获取声纹特征失败，总耗时: {total_time:.3f}秒，错误: {e}")
             return {}
 
     def delete_voiceprint(self, speaker_id: str) -> bool:
@@ -98,15 +121,22 @@ def count_voiceprints(self) -> int:
         Returns:
             int: 声纹特征总数
         """
+        start_time = time.time()
+        logger.info("开始查询声纹特征总数...")
+
         try:
             with db_connection.get_cursor() as cursor:
                 sql = "SELECT COUNT(*) FROM voiceprints"
                 cursor.execute(sql)
                 result = cursor.fetchone()
                 count = result[0] if result else 0
+
+                total_time = time.time() - start_time
+                logger.info(f"声纹特征总数查询完成: {count}，耗时: {total_time:.3f}秒")
                 return count
         except Exception as e:
-            logger.error(f"获取声纹特征总数失败: {e}")
+            total_time = time.time() - start_time
+            logger.error(f"获取声纹特征总数失败，总耗时: {total_time:.3f}秒，错误: {e}")
             return 0
 
 
diff --git a/app/services/voiceprint_service.py b/app/services/voiceprint_service.py
@@ -1,5 +1,6 @@
 import numpy as np
 import torch
+import time
 from typing import Dict, List, Tuple, Optional
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
@@ -53,13 +54,28 @@ def extract_voiceprint(self, audio_path: str) -> np.ndarray:
         Returns:
             np.ndarray: 声纹特征向量
         """
+        start_time = time.time()
+        logger.info(f"开始提取声纹特征，音频文件: {audio_path}")
+
         try:
+            pipeline_start = time.time()
             result = self._pipeline([audio_path], output_emb=True)
+            pipeline_time = time.time() - pipeline_start
+            logger.info(f"模型推理完成，耗时: {pipeline_time:.3f}秒")
+
+            convert_start = time.time()
             emb = self._to_numpy(result["embs"][0]).astype(np.float32)
-            logger.debug(f"声纹特征提取成功，维度: {emb.shape}")
+            convert_time = time.time() - convert_start
+            logger.info(f"数据转换完成，耗时: {convert_time:.3f}秒")
+
+            total_time = time.time() - start_time
+            logger.info(
+                f"声纹特征提取成功，维度: {emb.shape}，总耗时: {total_time:.3f}秒"
+            )
             return emb
         except Exception as e:
-            logger.error(f"声纹特征提取失败: {e}")
+            total_time = time.time() - start_time
+            logger.error(f"声纹特征提取失败，总耗时: {total_time:.3f}秒，错误: {e}")
             raise
 
     def calculate_similarity(self, emb1: np.ndarray, emb2: np.ndarray) -> float:
@@ -138,30 +154,56 @@ def identify_voiceprint(
         Returns:
             Tuple[str, float]: (识别出的说话人ID, 相似度分数)
         """
+        start_time = time.time()
+        logger.info(f"开始声纹识别流程，候选说话人数量: {len(speaker_ids)}")
+
         audio_path = None
         try:
             # 验证音频文件
+            validation_start = time.time()
             if not audio_processor.validate_audio_file(audio_bytes):
                 logger.warning("音频文件验证失败")
                 return "", 0.0
+            validation_time = time.time() - validation_start
+            logger.info(f"音频文件验证完成，耗时: {validation_time:.3f}秒")
 
             # 处理音频文件
+            audio_process_start = time.time()
             audio_path = audio_processor.ensure_16k_wav(audio_bytes)
+            audio_process_time = time.time() - audio_process_start
+            logger.info(f"音频文件处理完成，耗时: {audio_process_time:.3f}秒")
 
             # 提取声纹特征
+            extract_start = time.time()
+            logger.info("开始提取声纹特征...")
             test_emb = self.extract_voiceprint(audio_path)
+            extract_time = time.time() - extract_start
+            logger.info(f"声纹特征提取完成，耗时: {extract_time:.3f}秒")
 
             # 获取候选声纹特征
+            db_query_start = time.time()
+            logger.info("开始查询数据库获取候选声纹特征...")
             voiceprints = voiceprint_db.get_voiceprints(speaker_ids)
+            db_query_time = time.time() - db_query_start
+            logger.info(
+                f"数据库查询完成，获取到{len(voiceprints)}个声纹特征，耗时: {db_query_time:.3f}秒"
+            )
+
             if not voiceprints:
                 logger.info("未找到候选说话人声纹")
                 return "", 0.0
 
             # 计算相似度
+            similarity_start = time.time()
+            logger.info("开始计算相似度...")
             similarities = {}
             for name, emb in voiceprints.items():
                 similarity = self.calculate_similarity(test_emb, emb)
                 similarities[name] = similarity
+            similarity_time = time.time() - similarity_start
+            logger.info(
+                f"相似度计算完成，共计算{len(similarities)}个，耗时: {similarity_time:.3f}秒"
+            )
 
             # 找到最佳匹配
             if not similarities:
@@ -172,19 +214,30 @@ def identify_voiceprint(
 
             # 检查是否超过阈值
             if match_score < self.similarity_threshold:
-                logger.info(f"未识别到说话人，最高分: {match_score:.4f}")
+                logger.info(
+                    f"未识别到说话人，最高分: {match_score:.4f}，阈值: {self.similarity_threshold}"
+                )
+                total_time = time.time() - start_time
+                logger.info(f"声纹识别流程完成，总耗时: {total_time:.3f}秒")
                 return "", match_score
 
-            logger.info(f"识别到说话人: {match_name}, 分数: {match_score:.4f}")
+            total_time = time.time() - start_time
+            logger.info(
+                f"识别到说话人: {match_name}, 分数: {match_score:.4f}, 总耗时: {total_time:.3f}秒"
+            )
             return match_name, match_score
 
         except Exception as e:
-            logger.error(f"声纹识别异常: {e}")
+            total_time = time.time() - start_time
+            logger.error(f"声纹识别异常，总耗时: {total_time:.3f}秒，错误: {e}")
             return "", 0.0
         finally:
             # 清理临时文件
+            cleanup_start = time.time()
             if audio_path:
                 audio_processor.cleanup_temp_file(audio_path)
+            cleanup_time = time.time() - cleanup_start
+            logger.debug(f"临时文件清理完成，耗时: {cleanup_time:.3f}秒")
 
     def delete_voiceprint(self, speaker_id: str) -> bool:
         """
@@ -205,7 +258,18 @@ def get_voiceprint_count(self) -> int:
         Returns:
             int: 声纹总数
         """
-        return voiceprint_db.count_voiceprints()
+        start_time = time.time()
+        logger.info("开始获取声纹总数...")
+
+        try:
+            count = voiceprint_db.count_voiceprints()
+            total_time = time.time() - start_time
+            logger.info(f"声纹总数获取完成: {count}，耗时: {total_time:.3f}秒")
+            return count
+        except Exception as e:
+            total_time = time.time() - start_time
+            logger.error(f"获取声纹总数失败，总耗时: {total_time:.3f}秒，错误: {e}")
+            raise
 
 
 # 全局声纹服务实例
diff --git a/app/utils/audio_utils.py b/app/utils/audio_utils.py