Skip to content

Commit 20ba00e

Browse files
NRL-1215 Alternative implementation passing all tests
1 parent b07d007 commit 20ba00e

File tree

2 files changed

+18
-177
lines changed

2 files changed

+18
-177
lines changed
Lines changed: 17 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,5 @@
11
import json
2-
from collections import OrderedDict
3-
from typing import Any, Dict, List, Tuple
4-
5-
JsonPrimitive = str | int | float | bool | None
6-
type JsonValue = JsonPrimitive | JsonObject | JsonArray
7-
JsonPair = tuple[str, JsonValue]
8-
JsonObject = list[JsonPair]
9-
JsonArray = list[JsonValue]
10-
11-
12-
class DuplicateKeyChecker:
13-
"""JSON structure duplicate key detector.
14-
15-
Tracks duplicate keys by maintaining path context during traversal.
16-
Paths are recorded in dot notation with array indices:
17-
- Objects: parent.child
18-
- Arrays: parent.array[0]
19-
- Nested: parent.array[0].child[1].key
20-
"""
21-
22-
def __init__(self):
23-
# Here a list of paths because the same key name could be at different levels
24-
self.duplicate_keys_and_paths: OrderedDict[str, list[str]] = OrderedDict()
25-
# Track keys at each path level to detect duplicates
26-
self.key_registry: Dict[str, Dict[str, bool]] = {}
27-
self.current_duplicate_index: Dict[str, int] = {}
28-
# Track seen array elements to detect duplicates
29-
self.seen_array_elements: Dict[str, List[JsonValue]] = {}
30-
31-
def get_path_with_index(self, path: List[str], key: str) -> List[str]:
32-
current_level = ".".join(path)
33-
index_map = self.current_duplicate_index.setdefault(current_level, {})
34-
count = index_map.get(key, 0)
35-
index_map[key] = count + 1
36-
37-
# If it's the first occurrence, keep the key as is.
38-
# Subsequent occurrences get bracket-indexed.
39-
if count == 0:
40-
return path + [key]
41-
else:
42-
return path + [f"{key}[{count - 1}]"]
43-
44-
def check_key(self, key: str, path: List[str]) -> None:
45-
"""Check if a key at the current path is a duplicate.
46-
47-
A duplicate occurs when the same key appears twice at the same
48-
nesting level, even if the values differ.
49-
"""
50-
current_level = ".".join(path)
51-
current_keys = self.key_registry.setdefault(current_level, {})
52-
if key in current_keys:
53-
duplicate_path = ".".join(path + [key])
54-
self.duplicate_keys_and_paths.setdefault(key, []).append(duplicate_path)
55-
print(f"Found duplicate key: {key} at path: {'.'.join(path + [key])}")
56-
else:
57-
current_keys[key] = True
58-
59-
def process_collection(
60-
self, value: JsonObject | JsonArray, path: list[str], key: str
61-
) -> None:
62-
"""Determine if the given 'value' is an object or an array and handle it."""
63-
new_path = self.get_path_with_index(path, key)
64-
if value and isinstance(value[0], tuple):
65-
self.traverse_json(value, new_path)
66-
else:
67-
self.traverse_array(value, new_path)
68-
69-
def traverse_json(self, data: JsonObject, path: list[str]) -> None:
70-
"""Traverse JSON object and check for duplicate keys."""
71-
for key, value in data:
72-
print(f"Processing key: {key}, value: {value}")
73-
self.check_key(key, path)
74-
if isinstance(value, (list, tuple)):
75-
self.process_collection(value, path, key)
76-
77-
def traverse_array(self, items: JsonArray, path: list[str]) -> None:
78-
"""Process JSON array items while updating the path for duplicates."""
79-
array_path = path[-1]
80-
base_path = path[:-1]
81-
seen_elements = self.seen_array_elements.setdefault(".".join(path), set())
82-
83-
for idx, item in enumerate(items):
84-
serialized_item = json.dumps(item, sort_keys=True)
85-
if serialized_item in seen_elements:
86-
element = f"{array_path}[{idx}]"
87-
duplicate_path = ".".join(base_path + [element])
88-
self.duplicate_keys_and_paths.setdefault(element, []).append(
89-
duplicate_path
90-
)
91-
print(f"Found duplicate array element at path: {duplicate_path}")
92-
else:
93-
seen_elements.add(serialized_item)
94-
95-
if not isinstance(item, (list, tuple)):
96-
continue
97-
self.process_collection(item, base_path, f"{array_path}[{idx}]")
98-
2+
from typing import Any
993

1004
def check_for_duplicate_keys(pairs: list[tuple[str, Any]]):
1015
keys = {}
@@ -128,14 +32,25 @@ def flatten_duplicates(data: dict | list) -> list[str]:
12832
dupes = flatten_duplicates(value)
12933

13034
path = f"{key}" if isinstance(data, dict) else f"[{key}]"
131-
duplicates.extend([f"{path_key}.{dupe}" for dupe in dupes])
35+
duplicates.extend([f"{path}.{dupe}" for dupe in dupes])
13236

13337
print(f"flatten_duplicates data={data} dupes={duplicates}")
13438

13539
return duplicates
13640

13741

138-
def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]:
42+
def format_path(path):
43+
parts = path.split('.')
44+
formatted_parts = []
45+
for part in parts:
46+
if part.startswith('['):
47+
formatted_parts[-1] += part
48+
else:
49+
formatted_parts.append(part)
50+
return '.'.join(formatted_parts)
51+
52+
53+
def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]:
13954
"""Find all duplicate keys in a JSON string.
14055
14156
Traverses the entire JSON structure and reports:
@@ -153,25 +68,8 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]:
15368
dupe_data = json.loads(
15469
json_content, object_pairs_hook=check_for_duplicate_keys
15570
)
156-
duplicate_paths = [f"root.{path}" for path in flatten_duplicates(dupe_data)]
157-
duplicate_keys = [key.split(".")[-1] for key in duplicate_paths]
71+
duplicate_paths = [f"root.{format_path(path)}" for path in flatten_duplicates(dupe_data)]
72+
duplicate_keys = list(dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]))
15873
return duplicate_keys, duplicate_paths
15974
except json.JSONDecodeError:
160-
raise ValueError("Error: Invalid JSON format")
161-
162-
try:
163-
parsed_data = json.loads(json_content, object_pairs_hook=lambda pairs: pairs)
164-
print("Parsed JSON:", parsed_data)
165-
except json.JSONDecodeError:
166-
raise ValueError("Error: Invalid JSON format")
167-
168-
checker = DuplicateKeyChecker()
169-
checker.traverse_json(parsed_data, ["root"])
170-
171-
duplicates = list(checker.duplicate_keys_and_paths.keys())
172-
# flatten the list of paths
173-
paths = sum(checker.duplicate_keys_and_paths.values(), [])
174-
print("Final duplicates:", duplicates)
175-
print("Final paths:", paths)
176-
177-
return duplicates, paths
75+
raise ValueError("Error: Invalid JSON format")

layer/nrlf/core/tests/test_json_duplicate_checker.py

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -323,61 +323,4 @@ def test_array_edge_case_duplicate(self):
323323
"""
324324
duplicates, paths = check_duplicate_keys(json_content)
325325
self.assertEqual(duplicates, ["array"])
326-
self.assertEqual(paths, ["root.array"])
327-
328-
# def test_array_element_duplicate(self):
329-
# json_content = """
330-
# {
331-
# "array": [
332-
# 1,
333-
# 2,
334-
# 3,
335-
# 1
336-
# ]
337-
# }
338-
# """
339-
# duplicates, paths = check_duplicate_keys(json_content)
340-
# self.assertEqual(duplicates, ["array[3]"])
341-
# self.assertEqual(paths, ["root.array[3]"])
342-
343-
# deeply nested object with a deeply nested array with a duplicate
344-
def test_deeply_nested_object_with_deeply_nested_array_duplicate(self):
345-
json_content = """
346-
{
347-
"root": {
348-
"level1": {
349-
"level2": {
350-
"level3": {
351-
"level4": {
352-
"level5": {
353-
"level6": {
354-
"level7": {
355-
"level8": {
356-
"level9": {
357-
"level10": {
358-
"array": [
359-
{"key1": 1, "key2": 2},
360-
{"key1": 1, "key2": 2}
361-
]
362-
}
363-
}
364-
}
365-
}
366-
}
367-
}
368-
}
369-
}
370-
}
371-
}
372-
}
373-
}
374-
"""
375-
duplicates, paths = check_duplicate_keys(json_content)
376-
self.assertEqual(duplicates, ["array[1]"])
377-
# duplicate root here needs fixing in traverse_array loop
378-
self.assertEqual(
379-
paths,
380-
[
381-
"root.root.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.array[1]"
382-
],
383-
)
326+
self.assertEqual(paths, ["root.array"])

0 commit comments

Comments
 (0)