Skip to content

Commit 023e514

Browse files
committed
[NRL-1215] Switch root to DocumentReference in json dupe checker and add typing
1 parent ef8f45a commit 023e514

File tree

4 files changed

+53
-32
lines changed

4 files changed

+53
-32
lines changed

layer/nrlf/core/json_duplicate_checker.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from typing import Any
33

44

5-
def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict:
5+
def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
66
"""Custom JSON object_pairs_hook that checks for duplicate keys."""
7-
keys = {}
8-
dupes = {}
9-
normalized_keys = []
7+
keys: dict[str, Any] = {}
8+
dupes: dict[str, Any] = {}
9+
normalized_keys: list[str] = []
1010

1111
for key, value in pairs:
1212
normalized_key = key.lower()
@@ -22,9 +22,9 @@ def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict:
2222
return keys
2323

2424

25-
def flatten_duplicates(data: dict | list) -> list[str]:
25+
def flatten_duplicates(data: dict[str, Any] | list[Any]) -> list[str]:
2626
"""Flattens a JSON structure and returns a list of duplicate paths."""
27-
duplicates = []
27+
duplicates: list[str] = []
2828
items = data.items() if isinstance(data, dict) else enumerate(data)
2929

3030
for key, value in items:
@@ -39,9 +39,9 @@ def flatten_duplicates(data: dict | list) -> list[str]:
3939

4040

4141
def format_path(path: str) -> str:
42-
"""Transforms a path like root.key1.[2].key2 into root.key1[2].key2"""
42+
"""Transforms a path like key1.[2].key2 into key1[2].key2"""
4343
parts = path.split(".")
44-
formatted_parts = []
44+
formatted_parts: list[str] = []
4545
for part in parts:
4646
if part.startswith("["):
4747
formatted_parts[-1] += part
@@ -55,15 +55,16 @@ def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]:
5555
5656
Traverses the entire JSON structure and reports:
5757
- List of keys that appear multiple times at the same level
58-
- Full paths to each duplicate key occurrence
58+
- Full paths to each duplicate key occurrkeysence
5959
6060
A key is considered duplicate if it appears multiple times within
6161
the same object, regardless of nesting level or array position.
6262
"""
6363
try:
6464
dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys)
6565
duplicate_paths = [
66-
f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data)
66+
f"DocumentReference.{format_path(path)}"
67+
for path in flatten_duplicates(dupe_data)
6768
]
6869
duplicate_keys = list(
6970
dict.fromkeys([key.split(".")[-1] for key in duplicate_paths])

layer/nrlf/core/tests/test_json_duplicate_checker.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_simple_duplicates(self):
1414
json_content = '{"a": 1, "b": 2, "a": 3}'
1515
duplicates, paths = check_duplicate_keys(json_content)
1616
self.assertEqual(duplicates, ["a"])
17-
self.assertEqual(paths, ["root.a"])
17+
self.assertEqual(paths, ["DocumentReference.a"])
1818

1919
def test_nested_duplicates(self):
2020
# This JSON has no duplicates because the 'b' keys are at different levels
@@ -28,7 +28,7 @@ def test_same_level_duplicates(self):
2828
json_content = '{"a": {"b": 1, "b": 2}, "c": {"d": 3}}'
2929
duplicates, paths = check_duplicate_keys(json_content)
3030
self.assertEqual(duplicates, ["b"])
31-
self.assertEqual(paths, ["root.a.b"])
31+
self.assertEqual(paths, ["DocumentReference.a.b"])
3232

3333
def test_same_level_duplicates_objects(self):
3434
# This JSON has duplicates because there are two 'b' keys at the same level
@@ -38,14 +38,16 @@ def test_same_level_duplicates_objects(self):
3838
)
3939
duplicates, paths = check_duplicate_keys(json_content)
4040
self.assertEqual(duplicates, ["b"])
41-
self.assertEqual(paths, ["root.a.b"])
41+
self.assertEqual(paths, ["DocumentReference.a.b"])
4242

4343
def test_multiple_level_duplicates(self):
4444
# This JSON has duplicates at multiple levels
4545
json_content = '{"a": 1, "b": {"c": 2, "c": 3}, "a": 4}'
4646
duplicates, paths = check_duplicate_keys(json_content)
4747
self.assertEqual(sorted(duplicates), sorted(["a", "c"]))
48-
self.assertEqual(sorted(paths), sorted(["root.a", "root.b.c"]))
48+
self.assertEqual(
49+
sorted(paths), sorted(["DocumentReference.a", "DocumentReference.b.c"])
50+
)
4951

5052
def test_invalid_json(self):
5153
json_content = "{invalid json}"
@@ -56,7 +58,7 @@ def test_complex_nested_duplicates(self):
5658
json_content = '{"a": {"b": 1, "c": {"d": 2, "c": 3}}, "a": {"e": 4}}'
5759
duplicates, paths = check_duplicate_keys(json_content)
5860
self.assertEqual(sorted(duplicates), sorted(["a"]))
59-
self.assertEqual(sorted(paths), sorted(["root.a"]))
61+
self.assertEqual(sorted(paths), sorted(["DocumentReference.a"]))
6062

6163
def test_multiple_duplicates_same_path(self):
6264
json_content = """
@@ -82,7 +84,15 @@ def test_multiple_duplicates_same_path(self):
8284
duplicates, paths = check_duplicate_keys(json_content)
8385
self.assertEqual(sorted(duplicates), sorted(["b", "c", "e", "g"]))
8486
self.assertEqual(
85-
sorted(paths), sorted(["root.b", "root.b.c", "root.b.d.e", "root.b.d.f.g"])
87+
sorted(paths),
88+
sorted(
89+
[
90+
"DocumentReference.b",
91+
"DocumentReference.b.c",
92+
"DocumentReference.b.d.e",
93+
"DocumentReference.b.d.f.g",
94+
]
95+
),
8696
)
8797

8898
def test_no_duplicates_deeply_nested(self):
@@ -119,7 +129,10 @@ def test_duplicates_with_arrays(self):
119129
"""
120130
duplicates, paths = check_duplicate_keys(json_content)
121131
self.assertEqual(sorted(duplicates), sorted(["b", "c"]))
122-
self.assertEqual(sorted(paths), sorted(["root.a[0].b", "root.a[1].c"]))
132+
self.assertEqual(
133+
sorted(paths),
134+
sorted(["DocumentReference.a[0].b", "DocumentReference.a[1].c"]),
135+
)
123136

124137
def test_large_json_with_mixed_duplicates(self):
125138
json_content = """
@@ -148,7 +161,14 @@ def test_large_json_with_mixed_duplicates(self):
148161
self.assertEqual(sorted(duplicates), sorted(["c", "f", "h", "j"]))
149162
self.assertEqual(
150163
sorted(paths),
151-
sorted(["root.b.c", "root.b.e.f", "root.b.e.g.h", "root.i.j"]),
164+
sorted(
165+
[
166+
"DocumentReference.b.c",
167+
"DocumentReference.b.e.f",
168+
"DocumentReference.b.e.g.h",
169+
"DocumentReference.i.j",
170+
]
171+
),
152172
)
153173

154174
def test_complex_nested_arrays_with_duplicates(self):
@@ -190,13 +210,13 @@ def test_complex_nested_arrays_with_duplicates(self):
190210
sorted(paths),
191211
sorted(
192212
[
193-
"root.level1.arrays",
194-
"root.level1.arrays[0].a",
195-
"root.level1.arrays[0].nested.b",
196-
"root.level1.arrays[0].nested.b[0].c",
197-
"root.level1.arrays[1].mixed",
198-
"root.level1.arrays[1].mixed[1].f[0].g",
199-
"root.level1.arrays[1].mixed[1].f[1].h.i",
213+
"DocumentReference.level1.arrays",
214+
"DocumentReference.level1.arrays[0].a",
215+
"DocumentReference.level1.arrays[0].nested.b",
216+
"DocumentReference.level1.arrays[0].nested.b[0].c",
217+
"DocumentReference.level1.arrays[1].mixed",
218+
"DocumentReference.level1.arrays[1].mixed[1].f[0].g",
219+
"DocumentReference.level1.arrays[1].mixed[1].f[1].h.i",
200220
]
201221
),
202222
)
@@ -233,8 +253,8 @@ def test_deep_nested_array_object_duplicates(self):
233253
sorted(paths),
234254
sorted(
235255
[
236-
"root.root.level1[0].level2[0][0].data",
237-
"root.root.level1[0].level2[1][0].other",
256+
"DocumentReference.root.level1[0].level2[0][0].data",
257+
"DocumentReference.root.level1[0].level2[1][0].other",
238258
]
239259
),
240260
)
@@ -263,7 +283,7 @@ def get_expected_duplicates(self, max_depth):
263283
def get_expected_paths(self, max_depth):
264284
"""Helper function to get expected duplicate paths."""
265285
paths = []
266-
current_path = "root"
286+
current_path = "DocumentReference"
267287

268288
# Start from level0 and increment
269289
for i in range(max_depth):
@@ -314,10 +334,10 @@ def test_array_edge_case_duplicate(self):
314334
"""
315335
duplicates, paths = check_duplicate_keys(json_content)
316336
self.assertEqual(duplicates, ["array"])
317-
self.assertEqual(paths, ["root.array"])
337+
self.assertEqual(paths, ["DocumentReference.array"])
318338

319339
def test_case_sensitive_keys(self):
320340
json_content = '{"a": 1, "A": 2, "aA": 3, "Aa": 4}'
321341
duplicates, paths = check_duplicate_keys(json_content)
322342
self.assertEqual(duplicates, ["A", "Aa"])
323-
self.assertEqual(paths, ["root.A", "root.Aa"])
343+
self.assertEqual(paths, ["DocumentReference.A", "DocumentReference.Aa"])

layer/nrlf/core/tests/test_json_duplicate_checker_nrlf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,4 @@ def test_parse_body_valid_docref_with_duplicate_keys(field):
5959

6060
node = field.split(".")[-1]
6161
assert result[0] == [node]
62-
assert result[1] == [f"root.{field}"]
62+
assert result[1] == [f"DocumentReference.{field}"]

layer/nrlf/core/tests/test_request.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_parse_body_valid_docref_with_duplicate_key():
181181
]
182182
},
183183
"diagnostics": "Duplicate keys found in FHIR document: ['docStatus']",
184-
"expression": ["root.docStatus"],
184+
"expression": ["DocumentReference.docStatus"],
185185
}
186186
],
187187
}

0 commit comments

Comments
 (0)