Skip to content

Commit c7c2a52

Browse files
authored
Merge pull request #554 from guardrails-ai/mory91/check-json-text
Additional JSON parsing
2 parents 8833c05 + b6e369d commit c7c2a52

File tree

2 files changed

+71
-2
lines changed

2 files changed

+71
-2
lines changed

guardrails/utils/json_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from dataclasses import dataclass
44
from typing import Any, Dict, Optional, Tuple, Type, Union
55

6+
import regex
7+
68
from guardrails.datatypes import (
79
URL,
810
Boolean,
@@ -370,6 +372,14 @@ def extract_json_from_ouput(output: str) -> Tuple[Optional[Dict], Optional[Excep
370372
has_block, block_start, block_end = has_code_block(output)
371373
if has_block and block_start is not None and block_end is not None:
372374
extracted_code_block = get_code_block(output, block_start, block_end)
375+
else:
376+
json_pattern = regex.compile(r"\{(?:[^{}]+|\{(?:(?R)|[^{}]+)*\})*\}")
377+
json_groups = json_pattern.findall(output)
378+
json_start, json_end = output.find("{"), output.rfind("}")
379+
if len(json_groups) > 0 and len(json_groups[0]) == (
380+
json_end - json_start + 1
381+
):
382+
extracted_code_block = json_groups[0]
373383

374384
# Treat the output as a JSON string, and load it into a dict.
375385
error = None

tests/unit_tests/utils/test_json_utils.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,46 @@
3232
```
3333
"""
3434

35-
invalid_json_code_block = """
35+
invalid_json_code_block__quotes = """
3636
```json
3737
{
3838
a: 1
3939
}
4040
```
4141
"""
4242

43+
invalid_json_code_block__braces = """
44+
```json
45+
{"choice": {"action": "flight", "random_key": "random_value"}
46+
```
47+
"""
48+
49+
invalid_json_no_block__quotes = """
50+
{
51+
a: 1
52+
}
53+
"""
54+
55+
invalid_json_no_block__braces = """
56+
{"choice": {"action": "flight", "random_key": "random_value"}
57+
"""
58+
59+
text_with_no_code_block = """
60+
Here is the data you requested
61+
62+
{ "a": 1 , "b": { "c": [{"d": 2}, {"e": 3}]}}
63+
"""
64+
65+
text_with_json_code_block = """
66+
Here is the data you requested
67+
68+
```json
69+
{
70+
"a": 1
71+
}
72+
```
73+
"""
74+
4375
not_even_json = "This isn't even json..."
4476

4577

@@ -49,12 +81,39 @@
4981
(json_code_block, {"a": 1}, None),
5082
(anonymous_code_block, {"a": 1}, None),
5183
(no_code_block, {"a": 1}, None),
84+
(text_with_no_code_block, {"a": 1, "b": {"c": [{"d": 2}, {"e": 3}]}}, None),
85+
(text_with_json_code_block, {"a": 1}, None),
5286
(js_code_block, None, "Expecting value: line 1 column 1 (char 0)"),
5387
(
54-
invalid_json_code_block,
88+
invalid_json_code_block__quotes,
89+
None,
90+
"Expecting property name enclosed in double quotes: line 2 column 5 (char 6)", # noqa
91+
),
92+
(
93+
invalid_json_code_block__braces,
94+
None,
95+
"Expecting ',' delimiter: line 1 column 62 (char 61)", # noqa
96+
),
97+
(
98+
invalid_json_no_block__quotes,
5599
None,
56100
"Expecting property name enclosed in double quotes: line 2 column 5 (char 6)", # noqa
57101
),
102+
(
103+
invalid_json_no_block__braces,
104+
None,
105+
"Expecting ',' delimiter: line 3 column 1 (char 63)", # noqa
106+
),
107+
(
108+
invalid_json_code_block__quotes,
109+
None,
110+
"Expecting property name enclosed in double quotes: line 2 column 5 (char 6)", # noqa
111+
),
112+
(
113+
invalid_json_code_block__braces,
114+
None,
115+
"Expecting ',' delimiter: line 1 column 62 (char 61)", # noqa
116+
),
58117
(not_even_json, None, "Expecting value: line 1 column 1 (char 0)"),
59118
],
60119
)

0 commit comments

Comments
 (0)