1616
1717logger = logging .getLogger (__name__ )
1818
19- def estimate_entity_count_and_size (
20- client : datastore .Client , kind : str , namespace : Optional [str ]
21- ) -> Tuple [int , int ]:
22- query = client .query (kind = kind , namespace = namespace or None )
23- total_size = 0
24- count = 0
25- for entity in query .fetch ():
26- try :
27- raw_proto = entity_to_protobuf (entity )._pb
28- total_size += len (raw_proto .SerializeToString ())
29- except Exception :
30- # Fallback: count only
31- pass
32- count += 1
33- return count , total_size
34-
35- def analyze_kinds (config : AppConfig ) -> List [Dict ]:
19+
20+ def get_kind_stats (client , kind : str , namespace : Optional [str ] = None ) -> Tuple [Optional [int ], Optional [int ]]:
21+ """
22+ Returns (count, bytes) for the given kind/namespace using Datastore statistics.
23+ Falls back to None if not found.
24+ """
25+ if namespace :
26+ stats_kind = "__Stat_Kind_Ns__"
27+ query = client .query (kind = stats_kind )
28+ query .add_filter ("kind_name" , "=" , kind )
29+ query .add_filter ("namespace_name" , "=" , namespace )
30+ else :
31+ stats_kind = "__Stat_Kind__"
32+ query = client .query (kind = stats_kind )
33+ query .add_filter ("kind_name" , "=" , kind )
34+
35+ results = list (query .fetch (limit = 1 ))
36+ if results :
37+ return results [0 ]["count" ], results [0 ]["bytes" ]
38+ return None , None
39+
40+
41+ def estimate_entity_count_and_size (client , kind : str , namespace : Optional [str ], sample_size : int = 100 ) -> Tuple [int , int ]:
42+ """
43+ Original keys-only method: exact count, approximate bytes via sampling.
44+ """
45+ # Count with keys-only
46+ count_query = client .query (kind = kind , namespace = namespace or None )
47+ count_query .keys_only ()
48+ total_count = sum (1 for _ in count_query .fetch ())
49+
50+ # Sample for size
51+ sample_query = client .query (kind = kind , namespace = namespace or None )
52+ sample_entities = list (sample_query .fetch (limit = sample_size ))
53+ if sample_entities :
54+ avg_size = sum (len (entity_to_protobuf (e )._pb .SerializeToString ()) for e in sample_entities ) / len (sample_entities )
55+ else :
56+ avg_size = 0
57+
58+ return total_count , int (avg_size * total_count )
59+
60+
61+ def analyze_kinds (config : AppConfig , method : Optional [str ] = None ) -> List [Dict ]:
62+ """
63+ Analyze kinds using either:
64+ - 'stats' (default) => fast built-in Datastore statistics
65+ - 'scan' => keys-only scan with sampling
66+ Falls back to 'scan' if stats are missing for a kind.
67+ """
3668 client = build_client (config )
3769
70+ # Decide method priority: parameter > config > default
71+ method = method or getattr (config , "method" , None ) or "stats"
72+
3873 # Thanks to config.py normalisation, [] is the only “all” case
3974 namespaces = config .namespaces or list_namespaces (client )
4075
@@ -43,7 +78,16 @@ def analyze_kinds(config: AppConfig) -> List[Dict]:
4378 kinds = config .kinds or list_kinds (client , ns )
4479 logger .info ("Analyzing namespace=%s, %d kinds" , ns or "(default)" , len (kinds ))
4580 for kind in kinds :
46- count , total_bytes = estimate_entity_count_and_size (client , kind , ns )
81+ if method == "stats" :
82+ count , total_bytes = get_kind_stats (client , kind , ns )
83+ if count is None :
84+ logger .warning ("Stats not found for kind=%s, ns=%s — falling back to scan" , kind , ns or "(default)" )
85+ count , total_bytes = estimate_entity_count_and_size (client , kind , ns )
86+ elif method == "scan" :
87+ count , total_bytes = estimate_entity_count_and_size (client , kind , ns )
88+ else :
89+ raise ValueError (f"Unknown method: { method } " )
90+
4791 results .append (
4892 {
4993 "namespace" : ns ,
@@ -55,6 +99,7 @@ def analyze_kinds(config: AppConfig) -> List[Dict]:
5599 )
56100 return results
57101
102+
58103def print_summary_table (rows : List [Dict ]) -> None :
59104 # Plain stdout table for wide compatibility
60105 print ("namespace,kind,count,size,bytes" )
0 commit comments