Skip to content

Commit 3d43af2

Browse files
NRL-1215 Enhance duplicate detection for JSON arrays and add corresponding tests
1 parent 9b877d6 commit 3d43af2

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

layer/nrlf/core/json_duplicate_checker.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ def __init__(self):
2525
# Track keys at each path level to detect duplicates
2626
self.key_registry: Dict[str, Dict[str, bool]] = {}
2727
self.current_duplicate_index: Dict[str, int] = {}
28+
# Track seen array elements to detect duplicates
29+
self.seen_array_elements: Dict[str, List[JsonValue]] = {}
2830

2931
def get_path_with_index(self, path: List[str], key: str) -> List[str]:
3032
current_level = ".".join(path)
@@ -76,8 +78,24 @@ def traverse_array(self, items: JsonArray, path: list[str]) -> None:
7678
"""Process JSON array items while updating the path for duplicates."""
7779
array_path = path[-1]
7880
base_path = path[:-1]
81+
seen_elements = self.seen_array_elements.setdefault(".".join(path), set())
7982

8083
for idx, item in enumerate(items):
84+
serialized_item = json.dumps(item, sort_keys=True)
85+
if serialized_item in seen_elements:
86+
element = f"{array_path}[{idx}]"
87+
duplicate_path = ".".join(base_path + [element])
88+
self.duplicate_keys_and_paths.setdefault(element, []).append(duplicate_path)
89+
print(f"Found duplicate array element at path: {duplicate_path}")
90+
else:
91+
seen_elements.add(serialized_item)
92+
93+
# if item in seen_elements:
94+
# duplicate_path = f"{array_path}[{idx}]"
95+
# self.duplicate_keys_and_paths.setdefault(duplicate_path, []).append(f"{base_path[0]}.{duplicate_path}")
96+
# print(f"Found duplicate array element at path: {duplicate_path}")
97+
# else:
98+
# seen_elements.append(item)
8199
if not isinstance(item, (list, tuple)):
82100
continue
83101
self.process_collection(item, base_path, f"{array_path}[{idx}]")

layer/nrlf/core/tests/test_json_duplicate_checker.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,4 +323,56 @@ def test_array_edge_case_duplicate(self):
323323
"""
324324
duplicates, paths = check_duplicate_keys(json_content)
325325
self.assertEqual(duplicates, ["array"])
326-
self.assertEqual(paths, ["root.array"])
326+
self.assertEqual(paths, ["root.array"])
327+
328+
def test_array_element_duplicate(self):
329+
json_content = """
330+
{
331+
"array": [
332+
1,
333+
2,
334+
3,
335+
1
336+
]
337+
}
338+
"""
339+
duplicates, paths = check_duplicate_keys(json_content)
340+
self.assertEqual(duplicates, ["array[3]"])
341+
self.assertEqual(paths, ["root.array[3]"])
342+
343+
# deeply nested object with a deeply nested array with a duplicate
344+
def test_deeply_nested_object_with_deeply_nested_array_duplicate(self):
345+
json_content = """
346+
{
347+
"root": {
348+
"level1": {
349+
"level2": {
350+
"level3": {
351+
"level4": {
352+
"level5": {
353+
"level6": {
354+
"level7": {
355+
"level8": {
356+
"level9": {
357+
"level10": {
358+
"array": [
359+
{"key1": 1, "key2": 2},
360+
{"key1": 1, "key2": 2}
361+
]
362+
}
363+
}
364+
}
365+
}
366+
}
367+
}
368+
}
369+
}
370+
}
371+
}
372+
}
373+
}
374+
"""
375+
duplicates, paths = check_duplicate_keys(json_content)
376+
self.assertEqual(duplicates, ["array[1]"])
377+
# duplicate root here needs fixing in traverse_array loop
378+
self.assertEqual(paths, ["root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]"])

0 commit comments

Comments
 (0)