| 
 | 1 | +from deepdiff.helper import JSON  | 
 | 2 | +from deepdiff.serialization import json_dumps  | 
 | 3 | + | 
 | 4 | +# type edge_weight_child_strcuture = tuple[int, int, Any]  | 
 | 5 | + | 
 | 6 | +# Function to calculate node weights recursively  | 
 | 7 | +def calculate_weights(node):# -> tuple[int, tuple[str, edge_weight_child_strcuture]]:  | 
 | 8 | +    if isinstance(node, dict):  | 
 | 9 | +        weight = 0  | 
 | 10 | +        children_weights = {}  | 
 | 11 | +        for k, v in node.items():  | 
 | 12 | +            edge_weight = len(k)  | 
 | 13 | +            child_weight, child_structure = calculate_weights(v)  | 
 | 14 | +            total_weight = edge_weight + child_weight  | 
 | 15 | +            weight += total_weight  | 
 | 16 | +            children_weights[k] = (edge_weight, child_weight, child_structure)  | 
 | 17 | +        return weight, ('dict', children_weights)  | 
 | 18 | + | 
 | 19 | +    elif isinstance(node, list):  | 
 | 20 | +        weight = 0  | 
 | 21 | +        children_weights = []  | 
 | 22 | +        for v in node:  | 
 | 23 | +            edge_weight = 0  # As per updated instruction, indexes have zero weight  | 
 | 24 | +            child_weight, child_structure = calculate_weights(v)  | 
 | 25 | +            total_weight = edge_weight + child_weight  | 
 | 26 | +            weight += total_weight  | 
 | 27 | +            children_weights.append((edge_weight, child_weight, child_structure))  | 
 | 28 | +        return weight, ('list', children_weights)  | 
 | 29 | + | 
 | 30 | +    else:  | 
 | 31 | +        if isinstance(node, str):  | 
 | 32 | +            node_weight = len(node)  | 
 | 33 | +        elif isinstance(node, int):  | 
 | 34 | +            node_weight = len(str(node))  | 
 | 35 | +        elif isinstance(node, float):  | 
 | 36 | +            node_weight = len(str(round(node, 2)))  | 
 | 37 | +        elif node is None:  | 
 | 38 | +            node_weight = 1  | 
 | 39 | +        else:  | 
 | 40 | +            node_weight = 0  | 
 | 41 | +        return node_weight, ('leaf', node)  | 
 | 42 | + | 
 | 43 | + | 
 | 44 | +def _truncate(s: str, max_len: int) -> str:  | 
 | 45 | +    """  | 
 | 46 | +    Truncate string s to max_len characters.  | 
 | 47 | +    If possible, keep the first (max_len-5) characters, then '...' then the last 2 characters.  | 
 | 48 | +    """  | 
 | 49 | +    if len(s) <= max_len:  | 
 | 50 | +        return s  | 
 | 51 | +    if max_len <= 5:  | 
 | 52 | +        return s[:max_len]  | 
 | 53 | +    return s[:max_len - 5] + "..." + s[-2:]  | 
 | 54 | + | 
 | 55 | + | 
 | 56 | +# Greedy algorithm to shrink the tree  | 
 | 57 | +def shrink_tree(node_structure, max_weight: int) -> tuple[JSON, int]:  | 
 | 58 | +    node_type, node_info = node_structure  | 
 | 59 | + | 
 | 60 | +    if node_type == 'leaf':  | 
 | 61 | +        leaf_value = node_info  | 
 | 62 | +        leaf_weight, _ = calculate_weights(leaf_value)  | 
 | 63 | +        if leaf_weight <= max_weight:  | 
 | 64 | +            return leaf_value, leaf_weight  | 
 | 65 | +        else:  | 
 | 66 | +            # Truncate leaf value if string  | 
 | 67 | +            if isinstance(leaf_value, str):  | 
 | 68 | +                truncated_value = _truncate(leaf_value, max_weight)  | 
 | 69 | +                return truncated_value, len(truncated_value)  | 
 | 70 | +            # For int or float, convert to string and truncate  | 
 | 71 | +            elif isinstance(leaf_value, (int, float)):  | 
 | 72 | +                leaf_str = str(leaf_value)  | 
 | 73 | +                truncated_str = leaf_str[:max_weight]  | 
 | 74 | +                # Convert back if possible  | 
 | 75 | +                try:  | 
 | 76 | +                    return int(truncated_str), len(truncated_str)  | 
 | 77 | +                except Exception:  | 
 | 78 | +                    try:  | 
 | 79 | +                        return float(truncated_str), len(truncated_str)  | 
 | 80 | +                    except Exception:  | 
 | 81 | +                        return truncated_str, len(truncated_str)  | 
 | 82 | +            elif leaf_value is None:  | 
 | 83 | +                return None, 1 if max_weight >=1 else 0  | 
 | 84 | + | 
 | 85 | +    elif node_type == 'dict':  | 
 | 86 | +        shrunk_dict = {}  | 
 | 87 | +        total_weight = 0  | 
 | 88 | +        # Sort children by weight (heavy first)  | 
 | 89 | +        sorted_children = sorted(node_info.items(), key=lambda x: x[1][0] + x[1][1], reverse=True)  | 
 | 90 | +        for k, (edge_w, child_w, child_struct) in sorted_children:  | 
 | 91 | +            if total_weight + edge_w >= max_weight:  | 
 | 92 | +                continue  # Skip heavy edge entirely  | 
 | 93 | +            remaining_weight = max_weight - total_weight - edge_w  | 
 | 94 | +            shrunk_child, shrunk_weight = shrink_tree(child_struct, remaining_weight)  | 
 | 95 | +            if shrunk_child is not None:  | 
 | 96 | +                shrunk_dict[k[:edge_w]] = shrunk_child  | 
 | 97 | +                total_weight += edge_w + shrunk_weight  | 
 | 98 | +            if total_weight >= max_weight:  | 
 | 99 | +                break  | 
 | 100 | +        return shrunk_dict, total_weight  | 
 | 101 | + | 
 | 102 | +    elif node_type == 'list':  | 
 | 103 | +        shrunk_list = []  | 
 | 104 | +        total_weight = 0  | 
 | 105 | +        # Sort children by weight (heavy first)  | 
 | 106 | +        sorted_children = sorted(node_info, key=lambda x: x[0] + x[1], reverse=True)  | 
 | 107 | +        for edge_w, child_w, child_struct in sorted_children:  | 
 | 108 | +            remaining_weight = max_weight - total_weight  | 
 | 109 | +            shrunk_child, shrunk_weight = shrink_tree(child_struct, remaining_weight)  | 
 | 110 | +            if shrunk_child is not None:  | 
 | 111 | +                shrunk_list.append(shrunk_child)  | 
 | 112 | +                total_weight += shrunk_weight  | 
 | 113 | +            if total_weight >= max_weight - 1:  | 
 | 114 | +                shrunk_list.append('...')  | 
 | 115 | +                break  | 
 | 116 | +        return shrunk_list, total_weight  | 
 | 117 | +    return None, 1  | 
 | 118 | + | 
 | 119 | +# Main function to summarize the tree  | 
 | 120 | +def summarize_tree(tree: dict | list, max_weight: int) -> JSON:  | 
 | 121 | +    total_weight, tree_structure = calculate_weights(tree)  | 
 | 122 | +    if total_weight <= max_weight:  | 
 | 123 | +        return tree  # No need to shrink  | 
 | 124 | +    shrunk_tree, _ = shrink_tree(tree_structure, max_weight)  | 
 | 125 | +    return shrunk_tree  | 
 | 126 | + | 
 | 127 | +# Exposed function for user convenience  | 
 | 128 | +def summarize(json_data, max_length=200) -> str:  | 
 | 129 | +    return json_dumps(summarize_tree(json_data, max_length))  | 
0 commit comments