|
| 1 | +import json |
| 2 | +from typing import Any |
| 3 | + |
| 4 | + |
| 5 | +def check_for_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]: |
| 6 | + """Custom JSON object_pairs_hook that checks for duplicate keys.""" |
| 7 | + keys: dict[str, Any] = {} |
| 8 | + dupes: dict[str, Any] = {} |
| 9 | + normalized_keys: list[str] = [] |
| 10 | + |
| 11 | + for key, value in pairs: |
| 12 | + normalized_key = key.lower() |
| 13 | + if normalized_key in normalized_keys: |
| 14 | + dupes.setdefault(key, []).append(value) |
| 15 | + else: |
| 16 | + keys[key] = value |
| 17 | + normalized_keys += [normalized_key] |
| 18 | + |
| 19 | + if dupes: |
| 20 | + keys["__duplicates__"] = dupes |
| 21 | + |
| 22 | + return keys |
| 23 | + |
| 24 | + |
| 25 | +def flatten_duplicates(data: dict[str, Any] | list[Any]) -> list[str]: |
| 26 | + """Flattens a JSON structure and returns a list of duplicate paths.""" |
| 27 | + duplicates: list[str] = [] |
| 28 | + items = data.items() if isinstance(data, dict) else enumerate(data) |
| 29 | + |
| 30 | + for key, value in items: |
| 31 | + if key == "__duplicates__": |
| 32 | + duplicates.extend(value.keys()) |
| 33 | + elif isinstance(value, (dict, list)): |
| 34 | + path = f"{key}" if isinstance(data, dict) else f"[{key}]" |
| 35 | + dupes = flatten_duplicates(value) |
| 36 | + duplicates.extend([f"{path}.{dupe}" for dupe in dupes]) |
| 37 | + |
| 38 | + return duplicates |
| 39 | + |
| 40 | + |
| 41 | +def format_path(path: str) -> str: |
| 42 | + """Transforms a path like key1.[2].key2 into key1[2].key2""" |
| 43 | + parts = path.split(".") |
| 44 | + formatted_parts: list[str] = [] |
| 45 | + for part in parts: |
| 46 | + if part.startswith("["): |
| 47 | + formatted_parts[-1] += part |
| 48 | + else: |
| 49 | + formatted_parts.append(part) |
| 50 | + return ".".join(formatted_parts) |
| 51 | + |
| 52 | + |
| 53 | +def check_duplicate_keys(json_content: str) -> tuple[list[str], list[str]]: |
| 54 | + """Find all duplicate keys in a JSON string. |
| 55 | +
|
| 56 | + Traverses the entire JSON structure and reports: |
| 57 | + - List of keys that appear multiple times at the same level |
| 58 | + - Full paths to each duplicate key occurrkeysence |
| 59 | +
|
| 60 | + A key is considered duplicate if it appears multiple times within |
| 61 | + the same object, regardless of nesting level or array position. |
| 62 | + """ |
| 63 | + try: |
| 64 | + dupe_data = json.loads(json_content, object_pairs_hook=check_for_duplicate_keys) |
| 65 | + duplicate_paths = [ |
| 66 | + f"DocumentReference.{format_path(path)}" |
| 67 | + for path in flatten_duplicates(dupe_data) |
| 68 | + ] |
| 69 | + duplicate_keys = list( |
| 70 | + dict.fromkeys([key.split(".")[-1] for key in duplicate_paths]) |
| 71 | + ) |
| 72 | + return duplicate_keys, duplicate_paths |
| 73 | + except json.JSONDecodeError: |
| 74 | + raise ValueError("Error: Invalid JSON format") |
0 commit comments