Skip to content

Commit b9f8b59

Browse files
NRL-1215 Add json duplicate checker and tests, use it for request
1 parent c4e4698 commit b9f8b59

File tree

4 files changed

+443
-0
lines changed

4 files changed

+443
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import json
2+
from typing import List, Tuple, Set, Any, Dict, Union
3+
from nrlf.core.errors import OperationOutcomeError
4+
from nrlf.core.response import SpineErrorConcept
5+
6+
JsonValue = List[Any] | Tuple[Any, ...] | Any
7+
JsonPair = Tuple[str, JsonValue]
8+
9+
class DuplicateKeyChecker:
10+
"""JSON structure duplicate key detector.
11+
12+
Tracks duplicate keys by maintaining path context during traversal.
13+
Paths are recorded in dot notation with array indices:
14+
- Objects: parent.child
15+
- Arrays: parent.array[0]
16+
- Nested: parent.array[0].child[1].key
17+
"""
18+
19+
def __init__(self):
20+
self.duplicate_keys: Set[str] = set()
21+
self.duplicate_paths: Set[str] = set()
22+
# Track keys at each path level to detect duplicates
23+
self.key_registry: Dict[str, Dict[str, bool]] = {}
24+
25+
def check_key(self, key: str, path: List[str]) -> None:
26+
"""Check if a key at the current path is a duplicate.
27+
28+
A duplicate occurs when the same key appears twice at the same
29+
nesting level, even if the values differ.
30+
"""
31+
current_level = '.'.join(path)
32+
33+
if current_level not in self.key_registry:
34+
self.key_registry[current_level] = {}
35+
36+
if key in self.key_registry[current_level]:
37+
self.duplicate_keys.add(key)
38+
full_path = '.'.join(path + [key])
39+
self.duplicate_paths.add(full_path)
40+
print(f"Found duplicate key: {key} at path: {full_path}")
41+
else:
42+
self.key_registry[current_level][key] = True
43+
44+
def traverse_json(self, data: List[JsonPair], path: List[str]) -> None:
45+
"""Traverse JSON structure and check for duplicate keys.
46+
47+
Handles both objects and arrays, maintaining proper path context
48+
during traversal.
49+
"""
50+
for key, value in data:
51+
print(f"Processing key: {key}, value: {value}")
52+
self.check_key(key, path)
53+
54+
if isinstance(value, (list, tuple)):
55+
if value and isinstance(value[0], tuple):
56+
# Handle nested object
57+
self.traverse_json(value, path + [key])
58+
else:
59+
# Handle array
60+
self.traverse_array(value, path + [key])
61+
62+
def traverse_array(self, items: List[Any], path: List[str]) -> None:
63+
"""Process array items while tracking their indices in the path."""
64+
array_path = path[-1]
65+
base_path = path[:-1]
66+
67+
for idx, item in enumerate(items):
68+
if not isinstance(item, (tuple, list)):
69+
continue
70+
71+
current_path = base_path + [f"{array_path}[{idx}]"]
72+
if item and isinstance(item[0], tuple):
73+
# Handle object in array
74+
pairs = [item] if isinstance(item, tuple) else item
75+
self.traverse_json(pairs, current_path)
76+
else:
77+
# Handle nested array
78+
self.traverse_array(item, current_path)
79+
80+
def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]:
81+
"""Find all duplicate keys in a JSON string.
82+
83+
Traverses the entire JSON structure and reports:
84+
- List of keys that appear multiple times at the same level
85+
- Full paths to each duplicate key occurrence
86+
87+
A key is considered duplicate if it appears multiple times within
88+
the same object, regardless of nesting level or array position.
89+
"""
90+
try:
91+
parsed_data = json.loads(json_content, object_pairs_hook=lambda pairs: pairs)
92+
print("Parsed JSON:", parsed_data)
93+
except json.JSONDecodeError:
94+
raise ValueError("Error: Invalid JSON format")
95+
96+
checker = DuplicateKeyChecker()
97+
checker.traverse_json(parsed_data, ['root'])
98+
99+
duplicates = list(checker.duplicate_keys)
100+
paths = list(checker.duplicate_paths)
101+
print("Final duplicates:", duplicates)
102+
print("Final paths:", paths)
103+
104+
return duplicates, paths

layer/nrlf/core/request.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from nrlf.core.errors import OperationOutcomeError, ParseError
99
from nrlf.core.logger import LogReference, logger
1010
from nrlf.core.model import ClientRpDetails, ConnectionMetadata
11+
from nrlf.core.json_duplicate_checker import check_duplicate_keys
1112

1213

1314
def parse_headers(headers: Dict[str, str]) -> ConnectionMetadata:
@@ -88,6 +89,7 @@ def parse_body(
8889

8990
try:
9091
result = model.model_validate_json(body)
92+
raise_when_duplicate_keys(body)
9193
logger.log(LogReference.HANDLER009, parsed_body=result.model_dump())
9294
return result
9395

@@ -98,6 +100,20 @@ def parse_body(
98100
msg="Request body could not be parsed",
99101
) from None
100102

103+
def raise_when_duplicate_keys(json_content: str) -> None:
104+
"""
105+
Raises an error if duplicate keys are found in the JSON content.
106+
"""
107+
duplicates, paths = check_duplicate_keys(json_content)
108+
if duplicates:
109+
raise OperationOutcomeError(
110+
severity="error",
111+
code="required",
112+
details=SpineErrorConcept.from_code("MESSAGE_NOT_WELL_FORMED"),
113+
diagnostics=f"Duplicate keys found in FHIR document: {duplicates}",
114+
expression=paths,
115+
)
116+
101117

102118
def parse_path(
103119
model: Type[BaseModel] | None,
@@ -123,3 +139,4 @@ def parse_path(
123139
details=SpineErrorConcept.from_code("INVALID_PARAMETER"),
124140
msg="Invalid path parameter",
125141
) from None
142+

0 commit comments

Comments
 (0)