11import json
2- from collections import OrderedDict
3- from typing import Any , Dict , List , Tuple
4-
5- JsonPrimitive = str | int | float | bool | None
6- type JsonValue = JsonPrimitive | JsonObject | JsonArray
7- JsonPair = tuple [str , JsonValue ]
8- JsonObject = list [JsonPair ]
9- JsonArray = list [JsonValue ]
10-
11-
12- class DuplicateKeyChecker :
13- """JSON structure duplicate key detector.
14-
15- Tracks duplicate keys by maintaining path context during traversal.
16- Paths are recorded in dot notation with array indices:
17- - Objects: parent.child
18- - Arrays: parent.array[0]
19- - Nested: parent.array[0].child[1].key
20- """
21-
22- def __init__ (self ):
23- # Here a list of paths because the same key name could be at different levels
24- self .duplicate_keys_and_paths : OrderedDict [str , list [str ]] = OrderedDict ()
25- # Track keys at each path level to detect duplicates
26- self .key_registry : Dict [str , Dict [str , bool ]] = {}
27- self .current_duplicate_index : Dict [str , int ] = {}
28- # Track seen array elements to detect duplicates
29- self .seen_array_elements : Dict [str , List [JsonValue ]] = {}
30-
31- def get_path_with_index (self , path : List [str ], key : str ) -> List [str ]:
32- current_level = "." .join (path )
33- index_map = self .current_duplicate_index .setdefault (current_level , {})
34- count = index_map .get (key , 0 )
35- index_map [key ] = count + 1
36-
37- # If it's the first occurrence, keep the key as is.
38- # Subsequent occurrences get bracket-indexed.
39- if count == 0 :
40- return path + [key ]
41- else :
42- return path + [f"{ key } [{ count - 1 } ]" ]
43-
44- def check_key (self , key : str , path : List [str ]) -> None :
45- """Check if a key at the current path is a duplicate.
46-
47- A duplicate occurs when the same key appears twice at the same
48- nesting level, even if the values differ.
49- """
50- current_level = "." .join (path )
51- current_keys = self .key_registry .setdefault (current_level , {})
52- if key in current_keys :
53- duplicate_path = "." .join (path + [key ])
54- self .duplicate_keys_and_paths .setdefault (key , []).append (duplicate_path )
55- print (f"Found duplicate key: { key } at path: { '.' .join (path + [key ])} " )
56- else :
57- current_keys [key ] = True
58-
59- def process_collection (
60- self , value : JsonObject | JsonArray , path : list [str ], key : str
61- ) -> None :
62- """Determine if the given 'value' is an object or an array and handle it."""
63- new_path = self .get_path_with_index (path , key )
64- if value and isinstance (value [0 ], tuple ):
65- self .traverse_json (value , new_path )
66- else :
67- self .traverse_array (value , new_path )
68-
69- def traverse_json (self , data : JsonObject , path : list [str ]) -> None :
70- """Traverse JSON object and check for duplicate keys."""
71- for key , value in data :
72- print (f"Processing key: { key } , value: { value } " )
73- self .check_key (key , path )
74- if isinstance (value , (list , tuple )):
75- self .process_collection (value , path , key )
76-
77- def traverse_array (self , items : JsonArray , path : list [str ]) -> None :
78- """Process JSON array items while updating the path for duplicates."""
79- array_path = path [- 1 ]
80- base_path = path [:- 1 ]
81- seen_elements = self .seen_array_elements .setdefault ("." .join (path ), set ())
82-
83- for idx , item in enumerate (items ):
84- serialized_item = json .dumps (item , sort_keys = True )
85- if serialized_item in seen_elements :
86- element = f"{ array_path } [{ idx } ]"
87- duplicate_path = "." .join (base_path + [element ])
88- self .duplicate_keys_and_paths .setdefault (element , []).append (
89- duplicate_path
90- )
91- print (f"Found duplicate array element at path: { duplicate_path } " )
92- else :
93- seen_elements .add (serialized_item )
94-
95- if not isinstance (item , (list , tuple )):
96- continue
97- self .process_collection (item , base_path , f"{ array_path } [{ idx } ]" )
98-
2+ from typing import Any
993
1004def check_for_duplicate_keys (pairs : list [tuple [str , Any ]]):
1015 keys = {}
@@ -128,14 +32,25 @@ def flatten_duplicates(data: dict | list) -> list[str]:
12832 dupes = flatten_duplicates (value )
12933
13034 path = f"{ key } " if isinstance (data , dict ) else f"[{ key } ]"
131- duplicates .extend ([f"{ path_key } .{ dupe } " for dupe in dupes ])
35+ duplicates .extend ([f"{ path } .{ dupe } " for dupe in dupes ])
13236
13337 print (f"flatten_duplicates data={ data } dupes={ duplicates } " )
13438
13539 return duplicates
13640
13741
138- def check_duplicate_keys (json_content : str ) -> Tuple [List [str ], List [str ]]:
42+ def format_path (path ):
43+ parts = path .split ('.' )
44+ formatted_parts = []
45+ for part in parts :
46+ if part .startswith ('[' ):
47+ formatted_parts [- 1 ] += part
48+ else :
49+ formatted_parts .append (part )
50+ return '.' .join (formatted_parts )
51+
52+
53+ def check_duplicate_keys (json_content : str ) -> tuple [list [str ], list [str ]]:
13954 """Find all duplicate keys in a JSON string.
14055
14156 Traverses the entire JSON structure and reports:
@@ -153,25 +68,8 @@ def check_duplicate_keys(json_content: str) -> Tuple[List[str], List[str]]:
15368 dupe_data = json .loads (
15469 json_content , object_pairs_hook = check_for_duplicate_keys
15570 )
156- duplicate_paths = [f"root.{ path } " for path in flatten_duplicates (dupe_data )]
157- duplicate_keys = [key .split ("." )[- 1 ] for key in duplicate_paths ]
71+ duplicate_paths = [f"root.{ format_path ( path ) } " for path in flatten_duplicates (dupe_data )]
72+ duplicate_keys = list ( dict . fromkeys ( [key .split ("." )[- 1 ] for key in duplicate_paths ]))
15873 return duplicate_keys , duplicate_paths
15974 except json .JSONDecodeError :
160- raise ValueError ("Error: Invalid JSON format" )
161-
162- try :
163- parsed_data = json .loads (json_content , object_pairs_hook = lambda pairs : pairs )
164- print ("Parsed JSON:" , parsed_data )
165- except json .JSONDecodeError :
166- raise ValueError ("Error: Invalid JSON format" )
167-
168- checker = DuplicateKeyChecker ()
169- checker .traverse_json (parsed_data , ["root" ])
170-
171- duplicates = list (checker .duplicate_keys_and_paths .keys ())
172- # flatten the list of paths
173- paths = sum (checker .duplicate_keys_and_paths .values (), [])
174- print ("Final duplicates:" , duplicates )
175- print ("Final paths:" , paths )
176-
177- return duplicates , paths
75+ raise ValueError ("Error: Invalid JSON format" )
0 commit comments