Skip to content

Commit 14bfe22

Browse files
using stats
1 parent 378ff3c commit 14bfe22

File tree

1 file changed

+63
-18
lines changed

1 file changed

+63
-18
lines changed

gcd_tools/analyze_kinds.py

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,60 @@
1616

1717
logger = logging.getLogger(__name__)
1818

19-
def estimate_entity_count_and_size(
20-
client: datastore.Client, kind: str, namespace: Optional[str]
21-
) -> Tuple[int, int]:
22-
query = client.query(kind=kind, namespace=namespace or None)
23-
total_size = 0
24-
count = 0
25-
for entity in query.fetch():
26-
try:
27-
raw_proto = entity_to_protobuf(entity)._pb
28-
total_size += len(raw_proto.SerializeToString())
29-
except Exception:
30-
# Fallback: count only
31-
pass
32-
count += 1
33-
return count, total_size
34-
35-
def analyze_kinds(config: AppConfig) -> List[Dict]:
19+
20+
def get_kind_stats(client, kind: str, namespace: Optional[str] = None) -> Tuple[Optional[int], Optional[int]]:
21+
"""
22+
Returns (count, bytes) for the given kind/namespace using Datastore statistics.
23+
Falls back to None if not found.
24+
"""
25+
if namespace:
26+
stats_kind = "__Stat_Kind_Ns__"
27+
query = client.query(kind=stats_kind)
28+
query.add_filter("kind_name", "=", kind)
29+
query.add_filter("namespace_name", "=", namespace)
30+
else:
31+
stats_kind = "__Stat_Kind__"
32+
query = client.query(kind=stats_kind)
33+
query.add_filter("kind_name", "=", kind)
34+
35+
results = list(query.fetch(limit=1))
36+
if results:
37+
return results[0]["count"], results[0]["bytes"]
38+
return None, None
39+
40+
41+
def estimate_entity_count_and_size(client, kind: str, namespace: Optional[str], sample_size: int = 100) -> Tuple[int, int]:
42+
"""
43+
Original keys-only method: exact count, approximate bytes via sampling.
44+
"""
45+
# Count with keys-only
46+
count_query = client.query(kind=kind, namespace=namespace or None)
47+
count_query.keys_only()
48+
total_count = sum(1 for _ in count_query.fetch())
49+
50+
# Sample for size
51+
sample_query = client.query(kind=kind, namespace=namespace or None)
52+
sample_entities = list(sample_query.fetch(limit=sample_size))
53+
if sample_entities:
54+
avg_size = sum(len(entity_to_protobuf(e)._pb.SerializeToString()) for e in sample_entities) / len(sample_entities)
55+
else:
56+
avg_size = 0
57+
58+
return total_count, int(avg_size * total_count)
59+
60+
61+
def analyze_kinds(config: AppConfig, method: Optional[str] = None) -> List[Dict]:
62+
"""
63+
Analyze kinds using either:
64+
- 'stats' (default) => fast built-in Datastore statistics
65+
- 'scan' => keys-only scan with sampling
66+
Falls back to 'scan' if stats are missing for a kind.
67+
"""
3668
client = build_client(config)
3769

70+
# Decide method priority: parameter > config > default
71+
method = method or getattr(config, "method", None) or "stats"
72+
3873
# Thanks to config.py normalisation, [] is the only “all” case
3974
namespaces = config.namespaces or list_namespaces(client)
4075

@@ -43,7 +78,16 @@ def analyze_kinds(config: AppConfig) -> List[Dict]:
4378
kinds = config.kinds or list_kinds(client, ns)
4479
logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds))
4580
for kind in kinds:
46-
count, total_bytes = estimate_entity_count_and_size(client, kind, ns)
81+
if method == "stats":
82+
count, total_bytes = get_kind_stats(client, kind, ns)
83+
if count is None:
84+
logger.warning("Stats not found for kind=%s, ns=%s — falling back to scan", kind, ns or "(default)")
85+
count, total_bytes = estimate_entity_count_and_size(client, kind, ns)
86+
elif method == "scan":
87+
count, total_bytes = estimate_entity_count_and_size(client, kind, ns)
88+
else:
89+
raise ValueError(f"Unknown method: {method}")
90+
4791
results.append(
4892
{
4993
"namespace": ns,
@@ -55,6 +99,7 @@ def analyze_kinds(config: AppConfig) -> List[Dict]:
5599
)
56100
return results
57101

102+
58103
def print_summary_table(rows: List[Dict]) -> None:
59104
# Plain stdout table for wide compatibility
60105
print("namespace,kind,count,size,bytes")

0 commit comments

Comments
 (0)