Skip to content

Commit 4c43235

Browse files
Merge pull request #801 from NHSDigital/feature/axkr1-NRL-1215-duplicate-fields
NRL-1215 Detect duplicate fields
2 parents fe60322 + 67af076 commit 4c43235

12 files changed

+974
-0
lines changed

layer/nrlf/core/errors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ def response(self) -> Response:
7979
body=self.operation_outcome.model_dump_json(exclude_none=True, indent=2),
8080
)
8181

82+
def __str__(self):
83+
return f"OperationOutcomeError: {self.operation_outcome}"
84+
8285

8386
class ParseError(Exception):
8487
issues: List[OperationOutcomeIssue]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import json
2+
from typing import Any
3+
4+
5+
def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
6+
"""Custom JSON object_pairs_hook that checks for duplicate keys."""
7+
keys: dict[str, Any] = {}
8+
dupes: dict[str, Any] = {}
9+
normalized_keys: list[str] = []
10+
11+
for key, value in pairs:
12+
normalized_key = key.lower()
13+
if normalized_key in normalized_keys:
14+
dupes.setdefault(key, []).append(value)
15+
else:
16+
keys[key] = value
17+
normalized_keys += [normalized_key]
18+
19+
if dupes:
20+
keys["__duplicates__"] = dupes
21+
22+
return keys
23+
24+
25+
def flatten_duplicates(data: dict[str, Any] | list[Any]) -> list[str]:
26+
"""Flattens a JSON structure and returns a list of duplicate paths."""
27+
duplicates: list[str] = []
28+
items = data.items() if isinstance(data, dict) else enumerate(data)
29+
30+
for key, value in items:
31+
if key == "__duplicates__":
32+
duplicates.extend(value.keys())
33+
elif isinstance(value, (dict, list)):
34+
path = f"{key}" if isinstance(data, dict) else f"[{key}]"
35+
dupes = flatten_duplicates(value)
36+
duplicates.extend([f"{path}.{dupe}" for dupe in dupes])
37+
38+
return duplicates
39+
40+
41+
def format_path(path: str) -> str:
42+
"""Transforms a path like key1.[2].key2 into key1[2].key2"""
43+
parts = path.split(".")
44+
formatted_parts: list[str] = []
45+
for part in parts:
46+
if part.startswith("["):
47+
formatted_parts[-1] += part
48+
else:
49+
formatted_parts.append(part)
50+
return ".".join(formatted_parts)
51+
52+
53+
def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]:
54+
"""Find all duplicate keys in a JSON string.
55+
56+
Traverses the entire JSON structure and reports:
57+
- List of keys that appear multiple times at the same level
58+
- Full paths to each duplicate key occurrkeysence
59+
60+
A key is considered duplicate if it appears multiple times within
61+
the same object, regardless of nesting level or array position.
62+
"""
63+
try:
64+
dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys)
65+
duplicate_paths = [
66+
f"DocumentReference.{format_path(path)}"
67+
for path in flatten_duplicates(dupe_data)
68+
]
69+
duplicate_keys = list(
70+
dict.fromkeys([key.split(".")[-1] for key in duplicate_paths])
71+
)
72+
return duplicate_keys, duplicate_paths
73+
except json.JSONDecodeError:
74+
raise ValueError("Error: Invalid JSON format")

layer/nrlf/core/log_references.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class LogReference(Enum):
3535
)
3636
HANDLER016 = _Reference("INFO", "Set response headers")
3737
HANDLER017 = _Reference("WARN", "Correlation ID not found in request headers")
38+
HANDLER018 = _Reference("INFO", "Checking for duplicate keys in request body")
39+
HANDLER019 = _Reference("ERROR", "Duplicate keys found in the request body")
3840
HANDLER999 = _Reference("INFO", "Request handler returned successfully")
3941

4042
# Error Logs

layer/nrlf/core/request.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from nrlf.core.codes import SpineErrorConcept
77
from nrlf.core.constants import CLIENT_RP_DETAILS, CONNECTION_METADATA
88
from nrlf.core.errors import OperationOutcomeError, ParseError
9+
from nrlf.core.json_duplicate_checker import check_duplicate_keys
910
from nrlf.core.logger import LogReference, logger
1011
from nrlf.core.model import ClientRpDetails, ConnectionMetadata
1112

@@ -88,6 +89,7 @@ def parse_body(
8889

8990
try:
9091
result = model.model_validate_json(body)
92+
raise_when_duplicate_keys(body)
9193
logger.log(LogReference.HANDLER009, parsed_body=result.model_dump())
9294
return result
9395

@@ -99,6 +101,24 @@ def parse_body(
99101
) from None
100102

101103

104+
def raise_when_duplicate_keys(json_content: str) -> None:
105+
"""
106+
Raises an error if duplicate keys are found in the JSON content.
107+
"""
108+
logger.log(LogReference.HANDLER018)
109+
duplicates, paths = check_duplicate_keys(json_content)
110+
if duplicates:
111+
error = OperationOutcomeError(
112+
severity="error",
113+
code="invalid",
114+
details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"),
115+
diagnostics=f"Duplicate keys found in FHIR document: {duplicates}",
116+
expression=paths,
117+
)
118+
logger.log(LogReference.HANDLER019, error=str(error))
119+
raise error
120+
121+
102122
def parse_path(
103123
model: Type[BaseModel] | None,
104124
path_params: Dict[str, str] | None,

0 commit comments

Comments
 (0)