Skip to content

Commit 3a297dd

Browse files
committed
feat: add json-repair library as fallback for LLM response parsing
- Add json-repair>=0.30.0 to dependencies - Add _repair_json_with_library method as final fallback in JSON parsing - Update version to 2.0.0-beta.6
1 parent 4fa99b7 commit 3a297dd

File tree

7 files changed

+261
-57
lines changed

7 files changed

+261
-57
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
<div align="center">
1010

11-
[![Version](https://img.shields.io/badge/version-2.0.0--beta.5-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases)
11+
[![Version](https://img.shields.io/badge/version-2.0.0--beta.6-blue.svg)](https://github.com/lintsinghua/DeepAudit/releases)
1212
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
1313
[![React](https://img.shields.io/badge/React-18-61dafb.svg)](https://reactjs.org/)
1414
[![TypeScript](https://img.shields.io/badge/TypeScript-5.7-3178c6.svg)](https://www.typescriptlang.org/)

backend/app/services/llm/service.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
from .factory import LLMFactory
1212
from app.core.config import settings
1313

14+
# json-repair 库用于修复损坏的 JSON
15+
try:
16+
from json_repair import repair_json
17+
JSON_REPAIR_AVAILABLE = True
18+
except ImportError:
19+
JSON_REPAIR_AVAILABLE = False
20+
1421
logger = logging.getLogger(__name__)
1522

1623

@@ -458,6 +465,8 @@ def aggressive_fix_json(s: str) -> str:
458465
lambda: self._fix_truncated_json(clean_text(text)),
459466
# 6. 激进修复后解析
460467
lambda: json.loads(aggressive_fix_json(text)),
468+
# 7. 使用 json-repair 库作为最终兜底方案
469+
lambda: self._repair_json_with_library(text),
461470
]
462471

463472
last_error = None
@@ -574,6 +583,32 @@ def _fix_truncated_json(self, text: str) -> Dict[str, Any]:
574583
json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
575584
return json.loads(json_str)
576585

586+
def _repair_json_with_library(self, text: str) -> Dict[str, Any]:
587+
"""使用 json-repair 库修复损坏的 JSON(兜底方案)"""
588+
if not JSON_REPAIR_AVAILABLE:
589+
raise ValueError("json-repair library not available")
590+
591+
# 先尝试提取 JSON 部分
592+
start_idx = text.find('{')
593+
if start_idx == -1:
594+
raise ValueError("No JSON object found for repair")
595+
596+
# 尝试找到最后一个 }
597+
end_idx = text.rfind('}')
598+
if end_idx > start_idx:
599+
json_str = text[start_idx:end_idx + 1]
600+
else:
601+
json_str = text[start_idx:]
602+
603+
# 使用 json-repair 修复
604+
repaired = repair_json(json_str, return_objects=True)
605+
606+
if isinstance(repaired, dict):
607+
logger.info("✅ json-repair 库成功修复 JSON")
608+
return repaired
609+
610+
raise ValueError(f"json-repair returned unexpected type: {type(repaired)}")
611+
577612
def _get_default_response(self) -> Dict[str, Any]:
578613
"""返回默认响应"""
579614
return {

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ dependencies = [
2222
"reportlab>=4.0.0",
2323
"weasyprint>=66.0",
2424
"jinja2>=3.1.6",
25+
"json-repair>=0.30.0",
2526
]

backend/requirements-lock.txt

Lines changed: 164 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# This file was autogenerated by uv via the following command:
22
# uv pip compile requirements.txt -o requirements-lock.txt
3+
aiohappyeyeballs==2.6.1
4+
# via aiohttp
5+
aiohttp==3.13.2
6+
# via litellm
7+
aiosignal==1.4.0
8+
# via aiohttp
39
alembic==1.17.2
410
# via -r requirements.txt
511
annotated-doc==0.0.4
@@ -9,46 +15,135 @@ annotated-types==0.7.0
915
anyio==4.11.0
1016
# via
1117
# httpx
18+
# openai
1219
# starlette
1320
# watchfiles
1421
asyncpg==0.31.0
1522
# via -r requirements.txt
16-
bcrypt==5.0.0
17-
# via passlib
23+
attrs==25.4.0
24+
# via
25+
# aiohttp
26+
# jsonschema
27+
# referencing
28+
bcrypt==4.3.0
29+
# via
30+
# -r requirements.txt
31+
# passlib
32+
brotli==1.2.0
33+
# via fonttools
1834
certifi==2025.11.12
1935
# via
2036
# httpcore
2137
# httpx
38+
# requests
2239
cffi==2.0.0
23-
# via cryptography
40+
# via
41+
# cryptography
42+
# weasyprint
43+
charset-normalizer==3.4.4
44+
# via
45+
# reportlab
46+
# requests
2447
click==8.3.1
25-
# via uvicorn
48+
# via
49+
# litellm
50+
# typer-slim
51+
# uvicorn
2652
cryptography==46.0.3
2753
# via python-jose
54+
cssselect2==0.8.0
55+
# via weasyprint
56+
distro==1.9.0
57+
# via openai
58+
dnspython==2.8.0
59+
# via email-validator
2860
ecdsa==0.19.1
2961
# via python-jose
62+
email-validator==2.3.0
63+
# via -r requirements.txt
3064
fastapi==0.122.0
3165
# via -r requirements.txt
66+
fastuuid==0.14.0
67+
# via litellm
68+
filelock==3.20.0
69+
# via huggingface-hub
70+
fonttools==4.61.0
71+
# via weasyprint
72+
frozenlist==1.8.0
73+
# via
74+
# aiohttp
75+
# aiosignal
76+
fsspec==2025.12.0
77+
# via huggingface-hub
78+
greenlet==3.3.0
79+
# via -r requirements.txt
80+
grpcio==1.67.1
81+
# via litellm
3282
h11==0.16.0
3383
# via
3484
# httpcore
3585
# uvicorn
86+
hf-xet==1.2.0
87+
# via huggingface-hub
3688
httpcore==1.0.9
3789
# via httpx
3890
httptools==0.7.1
3991
# via uvicorn
4092
httpx==0.28.1
41-
# via -r requirements.txt
93+
# via
94+
# -r requirements.txt
95+
# huggingface-hub
96+
# litellm
97+
# openai
98+
huggingface-hub==1.2.1
99+
# via tokenizers
42100
idna==3.11
43101
# via
44102
# anyio
103+
# email-validator
45104
# httpx
105+
# requests
106+
# yarl
107+
importlib-metadata==8.7.0
108+
# via litellm
109+
jinja2==3.1.6
110+
# via
111+
# -r requirements.txt
112+
# litellm
113+
jiter==0.12.0
114+
# via openai
115+
json-repair==0.54.2
116+
# via -r requirements.txt
117+
jsonschema==4.25.1
118+
# via litellm
119+
jsonschema-specifications==2025.9.1
120+
# via jsonschema
121+
litellm==1.80.8
122+
# via -r requirements.txt
46123
mako==1.3.10
47124
# via alembic
48125
markupsafe==3.0.3
49-
# via mako
126+
# via
127+
# jinja2
128+
# mako
129+
multidict==6.7.0
130+
# via
131+
# aiohttp
132+
# yarl
133+
openai==2.9.0
134+
# via litellm
135+
packaging==25.0
136+
# via huggingface-hub
50137
passlib==1.7.4
51138
# via -r requirements.txt
139+
pillow==12.0.0
140+
# via
141+
# reportlab
142+
# weasyprint
143+
propcache==0.4.1
144+
# via
145+
# aiohttp
146+
# yarl
52147
pyasn1==0.6.1
53148
# via
54149
# python-jose
@@ -59,50 +154,111 @@ pydantic==2.12.4
59154
# via
60155
# -r requirements.txt
61156
# fastapi
157+
# litellm
158+
# openai
62159
# pydantic-settings
63160
pydantic-core==2.41.5
64161
# via pydantic
65162
pydantic-settings==2.12.0
66163
# via -r requirements.txt
164+
pydyf==0.12.1
165+
# via weasyprint
166+
pyphen==0.17.2
167+
# via weasyprint
67168
python-dotenv==1.2.1
68169
# via
170+
# litellm
69171
# pydantic-settings
70172
# uvicorn
71173
python-jose==3.5.0
72174
# via -r requirements.txt
73175
python-multipart==0.0.20
74176
# via -r requirements.txt
75177
pyyaml==6.0.3
76-
# via uvicorn
178+
# via
179+
# huggingface-hub
180+
# uvicorn
181+
referencing==0.37.0
182+
# via
183+
# jsonschema
184+
# jsonschema-specifications
185+
regex==2025.11.3
186+
# via tiktoken
187+
reportlab==4.4.5
188+
# via -r requirements.txt
189+
requests==2.32.5
190+
# via tiktoken
191+
rpds-py==0.30.0
192+
# via
193+
# jsonschema
194+
# referencing
77195
rsa==4.9.1
78196
# via python-jose
197+
shellingham==1.5.4
198+
# via huggingface-hub
79199
six==1.17.0
80200
# via ecdsa
81201
sniffio==1.3.1
82-
# via anyio
202+
# via
203+
# anyio
204+
# openai
83205
sqlalchemy==2.0.44
84206
# via
85207
# -r requirements.txt
86208
# alembic
87209
starlette==0.50.0
88210
# via fastapi
211+
tiktoken==0.12.0
212+
# via litellm
213+
tinycss2==1.5.1
214+
# via
215+
# cssselect2
216+
# weasyprint
217+
tinyhtml5==2.0.0
218+
# via weasyprint
219+
tokenizers==0.22.1
220+
# via litellm
221+
tqdm==4.67.1
222+
# via
223+
# huggingface-hub
224+
# openai
225+
typer-slim==0.20.0
226+
# via huggingface-hub
89227
typing-extensions==4.15.0
90228
# via
91229
# alembic
92230
# fastapi
231+
# huggingface-hub
232+
# openai
93233
# pydantic
94234
# pydantic-core
95235
# sqlalchemy
236+
# typer-slim
96237
# typing-inspection
97238
typing-inspection==0.4.2
98239
# via
99240
# pydantic
100241
# pydantic-settings
242+
urllib3==2.6.0
243+
# via requests
101244
uvicorn==0.38.0
102245
# via -r requirements.txt
103246
uvloop==0.22.1
104247
# via uvicorn
105248
watchfiles==1.1.1
106249
# via uvicorn
250+
weasyprint==67.0
251+
# via -r requirements.txt
252+
webencodings==0.5.1
253+
# via
254+
# cssselect2
255+
# tinycss2
256+
# tinyhtml5
107257
websockets==15.0.1
108258
# via uvicorn
259+
yarl==1.22.0
260+
# via aiohttp
261+
zipp==3.23.0
262+
# via importlib-metadata
263+
zopfli==0.4.0
264+
# via fonttools

backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ litellm>=1.0.0
1616
reportlab>=4.0.0
1717
weasyprint>=66.0
1818
jinja2>=3.1.6
19+
json-repair>=0.30.0

0 commit comments

Comments
 (0)