1- from  typing  import  Any 
1+ from  deepdiff . helper  import  JSON ,  SummaryNodeType 
22from  deepdiff .serialization  import  json_dumps 
33
44
5- def  _truncate (s , max_len ) :
5+ def  _truncate (s :  str , max_len :  int )  ->   str :
66    """ 
77    Truncate string s to max_len characters. 
88    If possible, keep the first (max_len-5) characters, then '...' then the last 2 characters. 
@@ -12,165 +12,126 @@ def _truncate(s, max_len):
1212    if  max_len  <=  5 :
1313        return  s [:max_len ]
1414    return  s [:max_len  -  5 ] +  "..."  +  s [- 2 :]
15+ # Re-defining the functions due to environment reset 
1516
16- class  JSONNode :
17-     def  __init__ (self , data : Any , key = None ):
18-         """ 
19-         Build a tree node for the JSON data. 
20-         If this node is a child of a dict, key is its key name. 
21-         """ 
22-         self .key  =  key 
23-         self .children_list : list [JSONNode ] =  []
24-         self .children_dict : list [tuple [Any , JSONNode ]] =  []
25-         self .value : str  =  "" 
26-         if  isinstance (data , dict ):
27-             self .type  =  "dict" 
28-             # Preserve insertion order: list of (key, child) pairs. 
29-             for  k , v  in  data .items ():
30-                 child  =  JSONNode (v , key = k )
31-                 self .children_dict .append ((k , child ))
32-         elif  isinstance (data , list ):
33-             self .type  =  "list" 
34-             self .children_list  =  [JSONNode (item ) for  item  in  data ]
17+ 
18+ # Function to calculate node weights recursively 
19+ def  calculate_weights (node ):
20+     if  isinstance (node , dict ):
21+         weight  =  0 
22+         children_weights  =  {}
23+         for  k , v  in  node .items ():
24+             edge_weight  =  len (k )
25+             child_weight , child_structure  =  calculate_weights (v )
26+             total_weight  =  edge_weight  +  child_weight 
27+             weight  +=  total_weight 
28+             children_weights [k ] =  (edge_weight , child_weight , child_structure )
29+         return  weight , (SummaryNodeType .dict , children_weights )
30+ 
31+     elif  isinstance (node , list ):
32+         weight  =  0 
33+         children_weights  =  []
34+         for  v  in  node :
35+             edge_weight  =  0   # Index weights are zero 
36+             child_weight , child_structure  =  calculate_weights (v )
37+             total_weight  =  edge_weight  +  child_weight 
38+             weight  +=  total_weight 
39+             children_weights .append ((edge_weight , child_weight , child_structure ))
40+         return  weight , (SummaryNodeType .list , children_weights )
41+ 
42+     else :
43+         if  isinstance (node , str ):
44+             node_weight  =  len (node )
45+         elif  isinstance (node , int ):
46+             node_weight  =  len (str (node ))
47+         elif  isinstance (node , float ):
48+             node_weight  =  len (str (round (node , 2 )))
49+         elif  node  is  None :
50+             node_weight  =  1 
51+         else :
52+             node_weight  =  0 
53+         return  node_weight , (SummaryNodeType .leaf , node )
54+ 
55+ # Include previously defined functions for shrinking with threshold 
56+ # (Implementing directly the balanced summarization algorithm as above) 
57+ 
58+ # Balanced algorithm (simplified version): 
59+ def  shrink_tree_balanced (node_structure , max_weight : int , balance_threshold : float ) ->  tuple [JSON , float ]:
60+     node_type , node_info  =  node_structure 
61+ 
62+     if  node_type  is  SummaryNodeType .leaf :
63+         leaf_value  =  node_info 
64+         leaf_weight , _  =  calculate_weights (leaf_value )
65+         if  leaf_weight  <=  max_weight :
66+             return  leaf_value , leaf_weight 
3567        else :
36-             self .type  =  "primitive" 
37-             # For primitives, use json.dumps to get a compact representation. 
38-             try :
39-                 self .value  =  json_dumps (data )
40-             except  Exception :
41-                 self .value  =  str (data )
42-     
43-     def  __repr__ (self ) ->  str :
44-         if  self .children_list :
45-             return  "List-["  +  "," .join ([str (i ) for  i  in  self .children_list ]) +  "]" 
46-         if  self .children_dict :
47-             return  "Dict-["  +  "," .join ([f"{ i }  :{ v }  "  for  i , v  in  self .children_dict ]) +  "]" 
48-         return  self .value 
49- 
50-     __str__  =  __repr__ 
51- 
52-     def  full_repr (self ) ->  str :
53-         """Return the full minimized JSON representation (without trimming) for this node.""" 
54-         if  self .type  ==  "primitive" :
55-             return  self .value 
56-         elif  self .type  ==  "dict" :
57-             parts  =  []
58-             for  k , child  in  self .children_dict :
59-                 parts .append (f'"{ k }  ":{ child .full_repr ()}  ' )
60-             return  "{"  +  "," .join (parts ) +  "}" 
61-         elif  self .type  ==  "list" :
62-             parts  =  [child .full_repr () for  child  in  self .children_list ]
63-             return  "["  +  "," .join (parts ) +  "]" 
64-         return  self .value 
65-     
66-     def  full_weight (self ):
67-         """Return the character count of the full representation.""" 
68-         return  len (self .full_repr ())
69-     
70-     def  _summarize (self , budget ) ->  str :
71-         """ 
72-         Return a summary string for this node that fits within budget characters. 
73-         The algorithm may drop whole sub-branches (for dicts) or truncate long primitives. 
74-         """ 
75-         if  self .type  ==  "primitive" :
76-             rep  =  self .value 
77-             if  len (rep ) <=  budget :
78-                 return  rep 
79-             else :
80-                 return  _truncate (rep , budget )
81-         elif  self .type  ==  "dict" :
82-             return  self ._summarize_dict (budget )
83-         elif  self .type  ==  "list" :
84-             return  self ._summarize_list (budget )
85-         return  str (self .value )
86-     
87-     def  _summarize_dict (self , budget ) ->  str :
88-         # If the dict is empty, return {} 
89-         if  not  self .children_dict :
90-             return  "{}" 
91-         # Build a list of pairs with fixed parts: 
92-         # Each pair: key_repr is f'"{key}":' 
93-         # Also store the full (untrimmed) child representation. 
94-         pairs  =  []
95-         for  k , child  in  self .children_dict :
96-             key_repr  =  f'"{ k }  ":' 
97-             child_full  =  child .full_repr ()
98-             pair_full  =  key_repr  +  child_full 
99-             pairs .append ({
100-                 "key" : k ,
101-                 "child" : child ,
102-                 "key_repr" : key_repr ,
103-                 "child_full" : child_full ,
104-                 "pair_full" : pair_full ,
105-                 "full_length" : len (pair_full )
106-             })
107-         n  =  len (pairs )
108-         fixed_overhead  =  2  +  (n  -  1 )  # braces plus commas between pairs 
109-         total_full  =  sum (p ["full_length" ] for  p  in  pairs ) +  fixed_overhead 
110-         # If full representation fits, return it. 
111-         if  total_full  <=  budget :
112-             parts  =  [p ["key_repr" ] +  p ["child_full" ] for  p  in  pairs ]
113-             return  "{"  +  "," .join (parts ) +  "}" 
114-         
115-         # Otherwise, try dropping some pairs. 
116-         kept  =  pairs .copy ()
117-         # Heuristic: while the representation is too long, drop the pair whose child_full is longest. 
118-         while  kept :
119-             # Sort kept pairs in original insertion order. 
120-             kept_sorted  =  sorted (kept , key = lambda  p : self .children_dict .index ((p ["key" ], p ["child" ])))
121-             current_n  =  len (kept_sorted )
122-             fixed  =  sum (len (p ["key_repr" ]) for  p  in  kept_sorted ) +  (current_n  -  1 ) +  2 
123-             remaining_budget  =  budget  -  fixed 
124-             if  remaining_budget  <  0 :
125-                 # Not enough even for fixed costs; drop one pair. 
126-                 kept .remove (max (kept , key = lambda  p : len (p ["child_full" ])))
68+             if  isinstance (leaf_value , str ):
69+                 truncated_value  =  _truncate (leaf_value , max_weight )
70+                 return  truncated_value , len (truncated_value )
71+             elif  isinstance (leaf_value , (int , float )):
72+                 leaf_str  =  str (leaf_value )
73+                 truncated_str  =  leaf_str [:max_weight ]
74+                 try :
75+                     return  int (truncated_str ), len (truncated_str )
76+                 except  Exception :
77+                     try :
78+                         return  float (truncated_str ), len (truncated_str )
79+                     except  Exception :
80+                         return  truncated_str , len (truncated_str )
81+             elif  leaf_value  is  None :
82+                 return  None , 1  if  max_weight  >=  1  else  0 
83+ 
84+     elif  node_type  is  SummaryNodeType .dict :
85+         shrunk_dict  =  {}
86+         total_weight  =  0 
87+         sorted_children  =  sorted (node_info .items (), key = lambda  x : x [1 ][0 ] +  x [1 ][1 ], reverse = True )
88+ 
89+         for  k , (edge_w , _ , child_struct ) in  sorted_children :
90+             allowed_branch_weight  =  min (max_weight  *  balance_threshold , max_weight  -  total_weight )
91+             if  allowed_branch_weight  <=  edge_w :
12792                continue 
128-             total_child_full  =  sum (len (p ["child_full" ]) for  p  in  kept_sorted )
129-             # Allocate available budget for each child's summary proportionally. 
130-             child_summaries  =  []
131-             for  p  in  kept_sorted :
132-                 ideal  =  int (remaining_budget  *  (len (p ["child_full" ]) /  total_child_full )) if  total_child_full  >  0  else  0 
133-                 summary_child  =  p ["child" ]._summarize (ideal )
134-                 child_summaries .append (summary_child )
135-             candidate  =  "{"  +  "," .join ([p ["key_repr" ] +  s  for  p , s  in  zip (kept_sorted , child_summaries )]) +  "}" 
136-             if  len (candidate ) <=  budget :
137-                 return  candidate 
138-             # If still too long, drop the pair with the largest child_full length. 
139-             to_drop  =  max (kept , key = lambda  p : len (p ["child_full" ]))
140-             kept .remove (to_drop )
141-         # If nothing remains, return a truncated empty object. 
142-         return  _truncate ("{}" , budget )
143-     
144-     def  _summarize_list (self , budget ) ->  str :
145-         # If the list is empty, return [] 
146-         if  not  self .children_list :
147-             return  "[]" 
148-         full_repr  =  self .full_repr ()
149-         if  len (full_repr ) <=  budget :
150-             return  full_repr 
151-         # For lists, show only the first element and an omission indicator if more elements exist. 
152-         suffix  =  ",..."  if  len (self .children_list ) >  1  else  "" 
153- 
154-         inner_budget  =  budget  -  2  -  len (suffix )  # subtract brackets and suffix 
155-         budget_per_element : int  =   min (inner_budget , max (4 , inner_budget  //  len (self .children_list )))
156-         max_element_count : int  =  inner_budget  //  budget_per_element 
157-         element_summaries : list [str ] =  []
158-         for  element  in  self .children_list [:max_element_count ]:
159-             element_summaries .append (element ._summarize (budget_per_element ))
160-         # first_summary = self.children_list[0]._summarize(budget_per_element) 
161-         joined_elements  =  "," .join (element_summaries )
162-         joined_elements  =  joined_elements .rstrip ("." )
163-         joined_elements  =  joined_elements [:inner_budget ]
164-         return  f"[{ joined_elements } { suffix }  ]" 
165-         # if len(candidate) <= budget: 
166-         #     return candidate 
167-         # return _truncate(candidate, budget) 
168- 
169- 
170- def  summarize (data , max_length = 200 ):
171-     """ 
172-     Build a tree for the given JSON-compatible data and return its summary, 
173-     ensuring the final string is no longer than self.max_length. 
174-     """ 
175-     root  =  JSONNode (data )
176-     return  root ._summarize (max_length ).replace ("{," , "{" )
93+ 
94+             remaining_weight  =  int (allowed_branch_weight  -  edge_w )
95+             shrunk_child , shrunk_weight  =  shrink_tree_balanced (child_struct , remaining_weight , balance_threshold )
96+             if  shrunk_child  is  not   None :
97+                 shrunk_dict [k [:edge_w ]] =  shrunk_child 
98+                 total_weight  +=  edge_w  +  shrunk_weight 
99+ 
100+             if  total_weight  >=  max_weight :
101+                 break 
102+         if  not  shrunk_dict :
103+             return  None , 0 
104+ 
105+         return  shrunk_dict , total_weight 
106+ 
107+     elif  node_type  is  SummaryNodeType .list :
108+         shrunk_list  =  []
109+         total_weight  =  0 
110+         sorted_children  =  sorted (node_info , key = lambda  x : x [0 ] +  x [1 ], reverse = True )
111+         for  edge_w , _ , child_struct  in  sorted_children :
112+             allowed_branch_weight  =  int (min (max_weight  *  balance_threshold , max_weight  -  total_weight ))
113+             shrunk_child , shrunk_weight  =  shrink_tree_balanced (child_struct , allowed_branch_weight , balance_threshold )
114+             if  shrunk_child  is  not   None :
115+                 shrunk_list .append (shrunk_child )
116+                 total_weight  +=  shrunk_weight 
117+             if  total_weight  >=  max_weight  -  1 :
118+                 shrunk_list .append ("..." )
119+                 break 
120+         if  not  shrunk_list :
121+             return  None , 0 
122+         return  shrunk_list , total_weight 
123+     return  None , 0 
124+ 
125+ 
126+ def  greedy_tree_summarization_balanced (json_data : JSON , max_weight : int , balance_threshold = 0.6 ) ->  JSON :
127+     total_weight , tree_structure  =  calculate_weights (json_data )
128+     if  total_weight  <=  max_weight :
129+         return  json_data 
130+     shrunk_tree , _  =  shrink_tree_balanced (tree_structure , max_weight , balance_threshold )
131+     return  shrunk_tree 
132+ 
133+ 
134+ def  summarize (data : JSON , max_length :int = 200 , balance_threshold :float = 0.6 ) ->  str :
135+     return  json_dumps (
136+         greedy_tree_summarization_balanced (data , max_length , balance_threshold )
137+     )
0 commit comments