|
1 | 1 | import os |
2 | 2 | import yaml |
| 3 | +import asyncio |
| 4 | +import requests |
3 | 5 | from pathlib import Path |
4 | 6 | from fastapi import Request, Body, Depends, HTTPException |
5 | 7 | from fastapi import APIRouter |
@@ -133,4 +135,111 @@ async def reload_info_config(): |
133 | 135 | logger.error(f"重新加载信息配置失败: {e}") |
134 | 136 | raise HTTPException(status_code=500, detail="重新加载信息配置失败") |
135 | 137 |
|
| 138 | +@base.get("/ocr/health") |
| 139 | +async def check_ocr_services_health(current_user: User = Depends(get_admin_user)): |
| 140 | + """ |
| 141 | + 检查所有OCR服务的健康状态 |
| 142 | + 返回各个OCR服务的可用性信息 |
| 143 | + """ |
| 144 | + health_status = { |
| 145 | + "rapid_ocr": {"status": "unknown", "message": ""}, |
| 146 | + "mineru_ocr": {"status": "unknown", "message": ""}, |
| 147 | + "paddlex_ocr": {"status": "unknown", "message": ""} |
| 148 | + } |
| 149 | + |
| 150 | + # 检查 RapidOCR (ONNX) 模型 |
| 151 | + try: |
| 152 | + model_dir = os.path.join(os.getenv("MODEL_DIR", ""), "SWHL/RapidOCR") |
| 153 | + det_model_path = os.path.join(model_dir, "PP-OCRv4/ch_PP-OCRv4_det_infer.onnx") |
| 154 | + rec_model_path = os.path.join(model_dir, "PP-OCRv4/ch_PP-OCRv4_rec_infer.onnx") |
| 155 | + |
| 156 | + if os.path.exists(model_dir) and os.path.exists(det_model_path) and os.path.exists(rec_model_path): |
| 157 | + # 尝试初始化RapidOCR |
| 158 | + from rapidocr_onnxruntime import RapidOCR |
| 159 | + test_ocr = RapidOCR(det_box_thresh=0.3, det_model_path=det_model_path, rec_model_path=rec_model_path) |
| 160 | + health_status["rapid_ocr"]["status"] = "healthy" |
| 161 | + health_status["rapid_ocr"]["message"] = "RapidOCR模型已加载" |
| 162 | + else: |
| 163 | + health_status["rapid_ocr"]["status"] = "unavailable" |
| 164 | + health_status["rapid_ocr"]["message"] = f"模型文件不存在: {model_dir}" |
| 165 | + except Exception as e: |
| 166 | + health_status["rapid_ocr"]["status"] = "error" |
| 167 | + health_status["rapid_ocr"]["message"] = f"RapidOCR初始化失败: {str(e)}" |
| 168 | + |
| 169 | + # 检查 MinerU OCR 服务 |
| 170 | + try: |
| 171 | + mineru_uri = os.getenv("MINERU_OCR_URI", "http://localhost:30000") |
| 172 | + health_url = f"{mineru_uri}/health" |
| 173 | + |
| 174 | + response = requests.get(health_url, timeout=5) |
| 175 | + if response.status_code == 200: |
| 176 | + health_status["mineru_ocr"]["status"] = "healthy" |
| 177 | + health_status["mineru_ocr"]["message"] = f"MinerU服务运行正常 ({mineru_uri})" |
| 178 | + else: |
| 179 | + health_status["mineru_ocr"]["status"] = "unhealthy" |
| 180 | + health_status["mineru_ocr"]["message"] = f"MinerU服务响应异常: {response.status_code}" |
| 181 | + except requests.exceptions.ConnectionError: |
| 182 | + health_status["mineru_ocr"]["status"] = "unavailable" |
| 183 | + health_status["mineru_ocr"]["message"] = "MinerU服务无法连接,请检查服务是否启动" |
| 184 | + except requests.exceptions.Timeout: |
| 185 | + health_status["mineru_ocr"]["status"] = "timeout" |
| 186 | + health_status["mineru_ocr"]["message"] = "MinerU服务连接超时" |
| 187 | + except Exception as e: |
| 188 | + health_status["mineru_ocr"]["status"] = "error" |
| 189 | + health_status["mineru_ocr"]["message"] = f"MinerU服务检查失败: {str(e)}" |
| 190 | + |
| 191 | + # 检查 PaddleX OCR 服务 |
| 192 | + try: |
| 193 | + paddlex_uri = os.getenv("PADDLEX_URI", "http://localhost:8080") |
| 194 | + health_url = f"{paddlex_uri}/health" |
| 195 | + |
| 196 | + response = requests.get(health_url, timeout=5) |
| 197 | + if response.status_code == 200: |
| 198 | + health_status["paddlex_ocr"]["status"] = "healthy" |
| 199 | + health_status["paddlex_ocr"]["message"] = f"PaddleX服务运行正常 ({paddlex_uri})" |
| 200 | + else: |
| 201 | + health_status["paddlex_ocr"]["status"] = "unhealthy" |
| 202 | + health_status["paddlex_ocr"]["message"] = f"PaddleX服务响应异常: {response.status_code}" |
| 203 | + except requests.exceptions.ConnectionError: |
| 204 | + health_status["paddlex_ocr"]["status"] = "unavailable" |
| 205 | + health_status["paddlex_ocr"]["message"] = "PaddleX服务无法连接,请检查服务是否启动" |
| 206 | + except requests.exceptions.Timeout: |
| 207 | + health_status["paddlex_ocr"]["status"] = "timeout" |
| 208 | + health_status["paddlex_ocr"]["message"] = "PaddleX服务连接超时" |
| 209 | + except Exception as e: |
| 210 | + health_status["paddlex_ocr"]["status"] = "error" |
| 211 | + health_status["paddlex_ocr"]["message"] = f"PaddleX服务检查失败: {str(e)}" |
| 212 | + |
| 213 | + # 计算整体健康状态 |
| 214 | + overall_status = "healthy" if any(svc["status"] == "healthy" for svc in health_status.values()) else "unhealthy" |
| 215 | + |
| 216 | + return { |
| 217 | + "overall_status": overall_status, |
| 218 | + "services": health_status, |
| 219 | + "message": "OCR服务健康检查完成" |
| 220 | + } |
| 221 | + |
| 222 | +@base.get("/ocr/stats") |
| 223 | +async def get_ocr_stats(current_user: User = Depends(get_admin_user)): |
| 224 | + """ |
| 225 | + 获取OCR服务使用统计信息 |
| 226 | + 返回各个OCR服务的处理统计和性能指标 |
| 227 | + """ |
| 228 | + try: |
| 229 | + from src.plugins._ocr import get_ocr_stats |
| 230 | + stats = get_ocr_stats() |
| 231 | + |
| 232 | + return { |
| 233 | + "status": "success", |
| 234 | + "stats": stats, |
| 235 | + "message": "OCR统计信息获取成功" |
| 236 | + } |
| 237 | + except Exception as e: |
| 238 | + logger.error(f"获取OCR统计信息失败: {str(e)}") |
| 239 | + return { |
| 240 | + "status": "error", |
| 241 | + "stats": {}, |
| 242 | + "message": f"获取OCR统计信息失败: {str(e)}" |
| 243 | + } |
| 244 | + |
136 | 245 |
|
0 commit comments