From 18be2a2dc2563a4ffc75df79d6ecb42dbb5fa0db Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 15:03:58 +0000 Subject: [PATCH 01/12] Initial project setup for local Datastore/Firestore management utilities Co-authored-by: isisosirishorus --- .github/workflows/build.yml | 46 +++++++++ CONTRIBUTING.md | 21 ++++ README.md | 24 ++++- cli.py | 128 ++++++++++++++++++++++++ config.yaml | 23 +++++ gcd_tools/__init__.py | 18 ++++ gcd_tools/analyze_entity_fields.py | 135 +++++++++++++++++++++++++ gcd_tools/analyze_kinds.py | 73 ++++++++++++++ gcd_tools/cleanup_expired.py | 81 +++++++++++++++ gcd_tools/config.py | 153 +++++++++++++++++++++++++++++ pyproject.toml | 47 +++++++++ requirements.txt | 3 + tests/test_import.py | 6 ++ 13 files changed, 757 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build.yml create mode 100644 CONTRIBUTING.md create mode 100644 cli.py create mode 100644 config.yaml create mode 100644 gcd_tools/__init__.py create mode 100644 gcd_tools/analyze_entity_fields.py create mode 100644 gcd_tools/analyze_kinds.py create mode 100644 gcd_tools/cleanup_expired.py create mode 100644 gcd_tools/config.py create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 tests/test_import.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..91cf7cc --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,46 @@ +name: build + +on: + push: + branches: [ main ] + tags: [ "v*" ] + pull_request: + +jobs: + ci: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install + run: | + python -m pip install -U pip + python -m pip install . + python -m pip install pytest + - name: Test + run: | + pytest -q + + publish: + needs: ci + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Build + run: | + python -m pip install -U pip build + python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..6f4b308 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,21 @@ +# Contributing + +Thanks for your interest in contributing! + +- Open an issue to discuss substantial changes. +- Fork and create feature branches from `main`. +- Run formatting and tests before submitting a PR. + +## Dev setup + +```bash +python -m venv .venv && source .venv/bin/activate +pip install -U pip +pip install -e . +``` + +## Testing + +```bash +python -m pytest -q +``` \ No newline at end of file diff --git a/README.md b/README.md index 5d34160..ce7ed6f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,24 @@ # local-storage-utils -Set of scripts and tools for managing GCP Datastore data in local + +Utilities for analyzing and managing local Datastore/Firestore (Datastore mode) data. Works with both the Datastore Emulator and GCP using Application Default Credentials. + +## Install + +```bash +pip install -e . +``` + +## CLI + +```bash +# Kind-level counts and size estimates +lsu analyze-kinds --project my-project + +# Field contribution analysis for a kind +lsu analyze-fields --kind SourceCollectionStateEntity --namespace "" --group-by batchId + +# TTL cleanup across kinds/namespaces (dry-run) +lsu cleanup --ttl-field expireAt --dry-run +``` + +Use `--help` on any command for full options. Config can be provided via `config.yaml` or flags. diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..9e6cb26 --- /dev/null +++ b/cli.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import json +from typing import List, Optional + +import typer + +from gcd_tools.config import AppConfig, load_config, format_size +from gcd_tools.analyze_kinds import analyze_kinds, print_summary_table +from gcd_tools.analyze_entity_fields import analyze_field_contributions, print_field_summary +from gcd_tools.cleanup_expired import cleanup_expired + +app = typer.Typer(help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)") + + +def _load_cfg( + config_path: Optional[str], + project: Optional[str], + emulator_host: Optional[str], + log_level: Optional[str], +) -> AppConfig: + overrides = {} + if project: + overrides["project_id"] = project + if emulator_host: + overrides["emulator_host"] = emulator_host + if log_level: + overrides["log_level"] = log_level + return load_config(config_path, overrides) + + +@app.command("analyze-kinds") +def cmd_analyze_kinds( + config: Optional[str] = typer.Option(None, help="Path to config.yaml"), + project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), + emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), + log_level: Optional[str] = typer.Option(None, help="Logging level"), + namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to include"), + exclude_namespace: Optional[List[str]] = typer.Option(None, "--exclude-namespace", help="Namespaces to exclude"), + kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to include"), + exclude_kind: Optional[List[str]] = typer.Option(None, "--exclude-kind", help="Kinds to exclude"), + output: Optional[str] = typer.Option(None, help="Output CSV file path"), +): + cfg = _load_cfg(config, project, emulator_host, log_level) + if namespace: + cfg.namespace_include = list(namespace) + if exclude_namespace: + cfg.namespace_exclude = list(exclude_namespace) + if kind: + cfg.kinds_include = list(kind) + if exclude_kind: + cfg.kinds_exclude = list(exclude_kind) + + rows = analyze_kinds(cfg) + if output: + with open(output, "w", encoding="utf-8") as fh: + fh.write("namespace,kind,count,size,bytes\n") + for r in rows: + ns = r.get("namespace") or "" + fh.write(f"{ns},{r['kind']},{r['count']},{r['size']},{r['bytes']}\n") + typer.echo(f"Wrote {len(rows)} rows to {output}") + else: + print_summary_table(rows) + + +@app.command("analyze-fields") +def cmd_analyze_fields( + kind: str = typer.Option(..., "--kind", "-k", help="Kind to analyze"), + namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Namespace to query"), + group_by: Optional[str] = typer.Option(None, help="Group results by this field value"), + only_field: Optional[List[str]] = typer.Option(None, "--only-field", help="Only consider these fields"), + config: Optional[str] = typer.Option(None, help="Path to config.yaml"), + project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), + emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), + log_level: Optional[str] = typer.Option(None, help="Logging level"), + output_json: Optional[str] = typer.Option(None, help="Write raw JSON results to file"), +): + cfg = _load_cfg(config, project, emulator_host, log_level) + result = analyze_field_contributions( + cfg, kind=kind, namespace=namespace, group_by_field=group_by, only_fields=list(only_field) if only_field else None + ) + + if output_json: + with open(output_json, "w", encoding="utf-8") as fh: + json.dump(result, fh, indent=2) + typer.echo(f"Wrote JSON results to {output_json}") + else: + print_field_summary(result) + + +@app.command("cleanup") +def cmd_cleanup( + config: Optional[str] = typer.Option(None, help="Path to config.yaml"), + project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), + emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), + log_level: Optional[str] = typer.Option(None, help="Logging level"), + namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to include"), + exclude_namespace: Optional[List[str]] = typer.Option(None, "--exclude-namespace", help="Namespaces to exclude"), + kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to include"), + exclude_kind: Optional[List[str]] = typer.Option(None, "--exclude-kind", help="Kinds to exclude"), + ttl_field: Optional[str] = typer.Option(None, help="TTL field name"), + delete_missing_ttl: bool = typer.Option(True, help="Delete when TTL field is missing"), + batch_size: Optional[int] = typer.Option(None, help="Delete batch size"), + dry_run: bool = typer.Option(False, help="Only report counts; do not delete"), +): + cfg = _load_cfg(config, project, emulator_host, log_level) + + if namespace: + cfg.namespace_include = list(namespace) + if exclude_namespace: + cfg.namespace_exclude = list(exclude_namespace) + if kind: + cfg.kinds_include = list(kind) + if exclude_kind: + cfg.kinds_exclude = list(exclude_kind) + if ttl_field: + cfg.ttl_field = ttl_field + cfg.delete_missing_ttl = delete_missing_ttl + if batch_size: + cfg.batch_size = batch_size + + totals = cleanup_expired(cfg, dry_run=dry_run) + deleted_sum = sum(totals.values()) + typer.echo(f"Total entities {'to delete' if dry_run else 'deleted'}: {deleted_sum}") + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..7537311 --- /dev/null +++ b/config.yaml @@ -0,0 +1,23 @@ +# Default configuration for local-storage-utils +# Values here can be overridden via CLI flags or environment variables. + +# Core connection +project_id: null # e.g. "my-project"; if omitted, ADC or env will be used +emulator_host: null # e.g. "localhost:8010"; if set, emulator mode is enabled + +# Filters +namespace_include: [] # e.g. ["", "tenant-a"]; empty string means default namespace +namespace_exclude: [] +kinds_include: [] +kinds_exclude: [] + +# Cleanup +ttl_field: "expireAt" +delete_missing_ttl: true +batch_size: 500 + +# Analysis +group_by_field: null + +# Logging +log_level: "INFO" \ No newline at end of file diff --git a/gcd_tools/__init__.py b/gcd_tools/__init__.py new file mode 100644 index 0000000..a1c8e85 --- /dev/null +++ b/gcd_tools/__init__.py @@ -0,0 +1,18 @@ +from .config import AppConfig, load_config, build_client, list_namespaces, list_kinds, format_size +from . import analyze_kinds as analyze_kinds +from . import analyze_entity_fields as analyze_entity_fields +from . import cleanup_expired as cleanup_expired +from . import config as config + +__all__ = [ + "AppConfig", + "load_config", + "build_client", + "list_namespaces", + "list_kinds", + "format_size", + "analyze_kinds", + "analyze_entity_fields", + "cleanup_expired", + "config", +] \ No newline at end of file diff --git a/gcd_tools/analyze_entity_fields.py b/gcd_tools/analyze_entity_fields.py new file mode 100644 index 0000000..e4bddcd --- /dev/null +++ b/gcd_tools/analyze_entity_fields.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import logging +from collections import defaultdict +from typing import DefaultDict, Dict, Iterable, List, Optional, Tuple + +from google.cloud import datastore +from google.cloud.datastore.helpers import entity_to_protobuf + +from .config import AppConfig, build_client, format_size + +logger = logging.getLogger(__name__) + + +def _clone_without_field(entity: datastore.Entity, exclude_field: str) -> datastore.Entity: + new_entity = datastore.Entity(key=entity.key) + for k, v in entity.items(): + if k != exclude_field: + new_entity[k] = v + return new_entity + + +def _estimate_field_contributions( + entities: Iterable[datastore.Entity], + target_fields: Optional[List[str]] = None, +) -> Tuple[Dict[str, int], int, int]: + field_totals: DefaultDict[str, int] = defaultdict(int) + total_size = 0 + entity_count = 0 + + for entity in entities: + entity_count += 1 + proto = entity_to_protobuf(entity)._pb + full_size = len(proto.SerializeToString()) + total_size += full_size + + for field in (target_fields or list(entity.keys())): + if field not in entity: + continue + reduced_entity = _clone_without_field(entity, field) + reduced_size = len(entity_to_protobuf(reduced_entity)._pb.SerializeToString()) + field_totals[field] += max(0, full_size - reduced_size) + + return dict(field_totals), total_size, entity_count + + +def analyze_field_contributions( + config: AppConfig, + kind: str, + namespace: Optional[str] = None, + group_by_field: Optional[str] = None, + only_fields: Optional[List[str]] = None, +) -> Dict: + client = build_client(config) + + query = client.query(kind=kind, namespace=namespace or None) + + if group_by_field: + logger.info( + "Analyzing field contributions for kind=%s, namespace=%s grouped by %s", + kind, + namespace or "(default)", + group_by_field, + ) + grouped_entities: DefaultDict[str, List[datastore.Entity]] = defaultdict(list) + for entity in query.fetch(): + group_val = entity.get(group_by_field) + key = str(group_val) if group_val is not None else "" + grouped_entities[key].append(entity) + + results: Dict[str, Dict] = {} + for group_key, ents in grouped_entities.items(): + field_totals, total_size, entity_count = _estimate_field_contributions( + ents, target_fields=only_fields + ) + results[group_key] = { + "namespace": namespace, + "kind": kind, + "group": group_key, + "entity_count": entity_count, + "total_bytes": total_size, + "total_size": format_size(total_size), + "fields": { + f: { + "bytes": b, + "avg_per_entity": (b / entity_count) if entity_count else 0.0, + "human": format_size(b), + } + for f, b in sorted(field_totals.items(), key=lambda x: x[1], reverse=True) + }, + } + return {"grouped": results} + + # Ungrouped path + logger.info( + "Analyzing field contributions for kind=%s, namespace=%s", + kind, + namespace or "(default)", + ) + field_totals, total_size, entity_count = _estimate_field_contributions( + query.fetch(), target_fields=only_fields + ) + return { + "namespace": namespace, + "kind": kind, + "entity_count": entity_count, + "total_bytes": total_size, + "total_size": format_size(total_size), + "fields": { + f: { + "bytes": b, + "avg_per_entity": (b / entity_count) if entity_count else 0.0, + "human": format_size(b), + } + for f, b in sorted(field_totals.items(), key=lambda x: x[1], reverse=True) + }, + } + + +def print_field_summary(result: Dict) -> None: + if "grouped" in result: + for group_key, data in result["grouped"].items(): + ns = data.get("namespace") or "" + print(f"\n[group={group_key}] ns={ns} kind={data['kind']} entities={data['entity_count']} total={data['total_size']}") + for field, stats in data["fields"].items(): + avg = stats["avg_per_entity"] + print(f" {field:30} {stats['human']:>12} ({avg:.1f} bytes avg)") + else: + ns = result.get("namespace") or "" + print( + f"ns={ns} kind={result['kind']} entities={result['entity_count']} total={result['total_size']}" + ) + for field, stats in result["fields"].items(): + avg = stats["avg_per_entity"] + print(f" {field:30} {stats['human']:>12} ({avg:.1f} bytes avg)") \ No newline at end of file diff --git a/gcd_tools/analyze_kinds.py b/gcd_tools/analyze_kinds.py new file mode 100644 index 0000000..323f318 --- /dev/null +++ b/gcd_tools/analyze_kinds.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import logging +from typing import Dict, List, Optional, Tuple + +from google.cloud import datastore +from google.cloud.datastore.helpers import entity_to_protobuf + +from .config import ( + AppConfig, + build_client, + list_namespaces, + list_kinds, + apply_kind_filters, + apply_namespace_filters, + format_size, +) + + +logger = logging.getLogger(__name__) + + +def estimate_entity_count_and_size( + client: datastore.Client, kind: str, namespace: Optional[str] +) -> Tuple[int, int]: + query = client.query(kind=kind, namespace=namespace or None) + total_size = 0 + count = 0 + for entity in query.fetch(): + try: + raw_proto = entity_to_protobuf(entity)._pb + total_size += len(raw_proto.SerializeToString()) + except Exception: + # Fallback: count only + pass + count += 1 + return count, total_size + + +def analyze_kinds(config: AppConfig) -> List[Dict]: + client = build_client(config) + + all_namespaces = list_namespaces(client) + namespaces = apply_namespace_filters( + all_namespaces, config.namespace_include, config.namespace_exclude + ) + + results: List[Dict] = [] + for ns in namespaces: + kinds = list_kinds(client, ns) + kinds = apply_kind_filters(kinds, config.kinds_include, config.kinds_exclude) + + logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) + for kind in kinds: + count, total_bytes = estimate_entity_count_and_size(client, kind, ns) + results.append( + { + "namespace": ns, + "kind": kind, + "count": count, + "bytes": total_bytes, + "size": format_size(total_bytes), + } + ) + return results + + +def print_summary_table(rows: List[Dict]) -> None: + # Plain stdout table for wide compatibility + print("namespace,kind,count,size,bytes") + for r in rows: + ns = r.get("namespace") or "" + print(f"{ns},{r['kind']},{r['count']},{r['size']},{r['bytes']}") \ No newline at end of file diff --git a/gcd_tools/cleanup_expired.py b/gcd_tools/cleanup_expired.py new file mode 100644 index 0000000..0062dac --- /dev/null +++ b/gcd_tools/cleanup_expired.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import logging +from datetime import datetime, timezone +from typing import Dict, Iterable, List, Optional + +from google.cloud import datastore + +from .config import ( + AppConfig, + build_client, + list_namespaces, + list_kinds, + apply_kind_filters, + apply_namespace_filters, + chunked, +) + +logger = logging.getLogger(__name__) + + +def _delete_in_batches(client: datastore.Client, keys: List[datastore.Key], batch_size: int) -> int: + deleted = 0 + for batch in chunked(keys, batch_size): + client.delete_multi(batch) # type: ignore[arg-type] + deleted += len(batch) + return deleted + + +def cleanup_expired( + config: AppConfig, + dry_run: bool = False, +) -> Dict[str, int]: + client = build_client(config) + + all_namespaces = list_namespaces(client) + namespaces = apply_namespace_filters( + all_namespaces, config.namespace_include, config.namespace_exclude + ) + + totals: Dict[str, int] = {} + now = datetime.now(timezone.utc) + + for ns in namespaces: + kinds = list_kinds(client, ns) + kinds = apply_kind_filters(kinds, config.kinds_include, config.kinds_exclude) + + for kind in kinds: + query = client.query(kind=kind, namespace=ns or None) + to_delete: List[datastore.Key] = [] + for entity in query.fetch(): + expire_at = entity.get(config.ttl_field) + expired = expire_at is None if config.delete_missing_ttl else False + if not expired and expire_at is not None: + try: + expired = expire_at < now + except Exception: + # If unparsable or timezone-less, skip + expired = False + if expired: + to_delete.append(entity.key) + + if dry_run: + logger.info( + "[DRY-RUN] ns=%s kind=%s would delete %d entities", + ns or "(default)", + kind, + len(to_delete), + ) + totals[f"{ns}:{kind}"] = len(to_delete) + else: + deleted = _delete_in_batches(client, to_delete, config.batch_size) if to_delete else 0 + logger.info( + "ns=%s kind=%s deleted %d expired entities", + ns or "(default)", + kind, + deleted, + ) + totals[f"{ns}:{kind}"] = deleted + + return totals \ No newline at end of file diff --git a/gcd_tools/config.py b/gcd_tools/config.py new file mode 100644 index 0000000..d1c3d81 --- /dev/null +++ b/gcd_tools/config.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import os +import logging +from dataclasses import dataclass, field +from typing import Dict, Iterable, List, Optional, Sequence, Tuple + +import yaml +from google.cloud import datastore + + +@dataclass +class AppConfig: + project_id: Optional[str] = None + emulator_host: Optional[str] = None + + # Filters + namespace_include: List[str] = field(default_factory=list) + namespace_exclude: List[str] = field(default_factory=list) + kinds_include: List[str] = field(default_factory=list) + kinds_exclude: List[str] = field(default_factory=list) + + # Cleanup settings + ttl_field: str = "expireAt" + delete_missing_ttl: bool = True + batch_size: int = 500 + + # Analysis settings + group_by_field: Optional[str] = None + + # Logging + log_level: str = "INFO" + + +def _as_list(value: Optional[Iterable[str]]) -> List[str]: + if value is None: + return [] + if isinstance(value, (list, tuple)): + return [str(v) for v in value] + return [str(value)] + + +def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) -> AppConfig: + config = AppConfig() + + # Load YAML if provided or if default exists + data: Dict = {} + candidate = path or os.getenv("LSU_CONFIG") + if not candidate and os.path.exists("config.yaml"): + candidate = "config.yaml" + + if candidate and os.path.exists(candidate): + with open(candidate, "r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) or {} + + overrides = overrides or {} + merged = {**data, **overrides} + + config.project_id = merged.get("project_id") or os.getenv("DATASTORE_PROJECT_ID") + config.emulator_host = merged.get("emulator_host") or os.getenv("DATASTORE_EMULATOR_HOST") + + config.namespace_include = _as_list(merged.get("namespace_include")) + config.namespace_exclude = _as_list(merged.get("namespace_exclude")) + config.kinds_include = _as_list(merged.get("kinds_include")) + config.kinds_exclude = _as_list(merged.get("kinds_exclude")) + + config.ttl_field = merged.get("ttl_field", config.ttl_field) + config.delete_missing_ttl = bool(merged.get("delete_missing_ttl", config.delete_missing_ttl)) + config.batch_size = int(merged.get("batch_size", config.batch_size)) + + config.group_by_field = merged.get("group_by_field", config.group_by_field) + + config.log_level = str(merged.get("log_level", config.log_level)).upper() + + _configure_logging(config.log_level) + return config + + +def _configure_logging(level: str) -> None: + level_value = getattr(logging, level.upper(), logging.INFO) + logging.basicConfig(level=level_value, format="%(asctime)s | %(levelname)s | %(message)s") + + +def build_client(config: AppConfig) -> datastore.Client: + # Prefer explicit emulator_host if provided, otherwise env decides + if config.emulator_host: + os.environ["DATASTORE_EMULATOR_HOST"] = config.emulator_host + # Project id is required in emulator; optional on GCP (ADC will detect) + if config.project_id: + os.environ.setdefault("DATASTORE_PROJECT_ID", config.project_id) + + if os.getenv("DATASTORE_EMULATOR_HOST"): + # When using emulator, ensure a project ID is present + project_id = os.getenv("DATASTORE_PROJECT_ID") or config.project_id or "local-dev" + os.environ["DATASTORE_PROJECT_ID"] = project_id + return datastore.Client(project=project_id) + + # GCP path, relies on ADC if project not provided + return datastore.Client(project=config.project_id) + + +def list_namespaces(client: datastore.Client) -> List[str]: + # Include default namespace as "" first + namespaces: List[str] = [""] + query = client.query(kind="__namespace__") + query.keys_only() + for entity in query.fetch(): + name = entity.key.name or "" + if name != "": + namespaces.append(name) + return namespaces + + +def list_kinds(client: datastore.Client, namespace: Optional[str]) -> List[str]: + query = client.query(kind="__kind__", namespace=namespace or None) + query.keys_only() + return [e.key.name for e in query.fetch()] + + +def apply_namespace_filters(all_namespaces: Sequence[str], include: Sequence[str], exclude: Sequence[str]) -> List[str]: + selected = list(all_namespaces) + if include: + include_set = set(include) + selected = [ns for ns in selected if ns in include_set] + if exclude: + exclude_set = set(exclude) + selected = [ns for ns in selected if ns not in exclude_set] + return selected + + +def apply_kind_filters(all_kinds: Sequence[str], include: Sequence[str], exclude: Sequence[str]) -> List[str]: + selected = list(all_kinds) + if include: + include_set = set(include) + selected = [k for k in selected if k in include_set] + if exclude: + exclude_set = set(exclude) + selected = [k for k in selected if k not in exclude_set] + return selected + + +def chunked(iterable: Sequence, chunk_size: int) -> Iterable[Sequence]: + for i in range(0, len(iterable), max(1, chunk_size)): + yield iterable[i : i + chunk_size] + + +def format_size(bytes_size: int) -> str: + size = float(bytes_size) + for unit in ["B", "KB", "MB", "GB", "TB"]: + if size < 1024: + return f"{size:.2f} {unit}" + size /= 1024 + return f"{size:.2f} PB" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b9fcc3a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "local-storage-utils" +version = "0.1.0" +description = "Utilities for analyzing and managing local Datastore/Firestore (Datastore mode) data" +authors = [ + { name = "Your Name", email = "you@example.com" }, +] +readme = "README.md" +requires-python = ">=3.9" +license = { file = "LICENSE" } +keywords = ["google-cloud-datastore", "firestore", "emulator", "cleanup", "analysis"] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +dependencies = [ + "google-cloud-datastore>=2.19.0", + "PyYAML>=6.0.1", + "typer>=0.12.3", +] + +[project.optional-dependencies] +rich = ["rich>=13.7.0"] + +[project.scripts] +lsu = "cli:app" +local-storage-utils = "cli:app" + +[tool.setuptools.packages.find] +where = ["."] +include = ["gcd_tools*"] + +[tool.black] +line-length = 100 + +[tool.ruff] +line-length = 100 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e8f4d09 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +google-cloud-datastore>=2.19.0 +PyYAML>=6.0.1 +typer>=0.12.3 \ No newline at end of file diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 0000000..b7f63dd --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1,6 @@ +def test_imports(): + import gcd_tools + from gcd_tools import analyze_kinds, analyze_entity_fields, cleanup_expired, config + + assert gcd_tools is not None + assert hasattr(config, "AppConfig") \ No newline at end of file From 0619748b6ffa8b21bbf83134cfdedda23e2b800e Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 20:11:47 +0000 Subject: [PATCH 02/12] Refactor config handling, improve CLI flexibility, and update documentation Co-authored-by: isisosirishorus --- .github/workflows/build.yml | 3 + README.md | 89 ++++++++++++++++++++++++++++-- cli.py | 60 ++++++++++---------- config.yaml | 14 +++-- gcd_tools/analyze_entity_fields.py | 44 ++++++++++++--- gcd_tools/analyze_kinds.py | 12 ++-- gcd_tools/cleanup_expired.py | 14 ++--- gcd_tools/config.py | 49 ++++++---------- 8 files changed, 187 insertions(+), 98 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 91cf7cc..cd842b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,6 +30,9 @@ jobs: needs: ci if: startsWith(github.ref, 'refs/tags/v') runs-on: ubuntu-latest + permissions: + id-token: write + contents: read steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/README.md b/README.md index ce7ed6f..59b25c2 100644 --- a/README.md +++ b/README.md @@ -2,23 +2,104 @@ Utilities for analyzing and managing local Datastore/Firestore (Datastore mode) data. Works with both the Datastore Emulator and GCP using Application Default Credentials. -## Install +## Install (PyPI) ```bash +pip install local-storage-utils +``` + +This installs the `lsu` CLI. + +## Install (from source) + +```bash +git clone +cd local-storage-utils +python -m venv .venv +source .venv/bin/activate +pip install -U pip pip install -e . ``` -## CLI +## Configuration + +- By default, the CLI loads `config.yaml` from the current directory if present. +- Any CLI flag overrides values from `config.yaml`. +- If neither config nor flags provide a value, the tool falls back to environment variables (for emulator detection) or sensible defaults. + +Key settings in `config.yaml`: + +```yaml +project_id: "my-project" # If omitted, ADC/env will be used +emulator_host: "localhost:8010" # If set, uses Datastore Emulator + +# Explicit filters (empty means all) +namespaces: [""] # Empty -> iterate all namespaces (including default "") +kinds: [] # Empty -> iterate all kinds per namespace + +# Optional defaults +kind: "SourceCollectionStateEntity" # Default for analyze-fields +namespace: "" # Default namespace for analyze-fields + +# Cleanup +ttl_field: "expireAt" +delete_missing_ttl: true +batch_size: 500 + +# Analysis +group_by_field: null + +# Logging +log_level: "INFO" +``` + +## CLI usage ```bash # Kind-level counts and size estimates lsu analyze-kinds --project my-project -# Field contribution analysis for a kind +# Use all namespaces/kinds by default, or restrict explicitly +lsu analyze-kinds --namespace "" --namespace tenant-a --kind SourceCollectionStateEntity + +# Field contribution analysis (falls back to config.kind/config.namespace if not provided) lsu analyze-fields --kind SourceCollectionStateEntity --namespace "" --group-by batchId -# TTL cleanup across kinds/namespaces (dry-run) +# TTL cleanup across namespaces/kinds (dry-run) lsu cleanup --ttl-field expireAt --dry-run + +# TTL cleanup restricted to specific namespaces/kinds +lsu cleanup --namespace "" --namespace tenant-a --kind pipeline-job ``` Use `--help` on any command for full options. Config can be provided via `config.yaml` or flags. + +## Development + +- Create a virtual environment and install in editable mode as shown above +- Run tests: + +```bash +python -m pip install pytest +pytest -q +``` + +- Lint/format (optional if you use pre-commit/CI): +```bash +python -m pip install ruff black +ruff check . +black . +``` + +## Publishing + +- CI is configured to publish to PyPI on tags `v*`. +- Create a PyPI token and add it to repository secrets as `PYPI_API_TOKEN`. +- Tag and push: + +```bash +git tag v0.1.0 +git push origin v0.1.0 +``` + +The GitHub Actions workflow will build and upload the package to PyPI. diff --git a/cli.py b/cli.py index 9e6cb26..e5c7c16 100644 --- a/cli.py +++ b/cli.py @@ -5,7 +5,7 @@ import typer -from gcd_tools.config import AppConfig, load_config, format_size +from gcd_tools.config import AppConfig, load_config from gcd_tools.analyze_kinds import analyze_kinds, print_summary_table from gcd_tools.analyze_entity_fields import analyze_field_contributions, print_field_summary from gcd_tools.cleanup_expired import cleanup_expired @@ -35,21 +35,16 @@ def cmd_analyze_kinds( project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), log_level: Optional[str] = typer.Option(None, help="Logging level"), - namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to include"), - exclude_namespace: Optional[List[str]] = typer.Option(None, "--exclude-namespace", help="Namespaces to exclude"), - kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to include"), - exclude_kind: Optional[List[str]] = typer.Option(None, "--exclude-kind", help="Kinds to exclude"), + namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), + kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)"), output: Optional[str] = typer.Option(None, help="Output CSV file path"), ): cfg = _load_cfg(config, project, emulator_host, log_level) + if namespace: - cfg.namespace_include = list(namespace) - if exclude_namespace: - cfg.namespace_exclude = list(exclude_namespace) + cfg.namespaces = list(namespace) if kind: - cfg.kinds_include = list(kind) - if exclude_kind: - cfg.kinds_exclude = list(exclude_kind) + cfg.kinds = list(kind) rows = analyze_kinds(cfg) if output: @@ -65,9 +60,9 @@ def cmd_analyze_kinds( @app.command("analyze-fields") def cmd_analyze_fields( - kind: str = typer.Option(..., "--kind", "-k", help="Kind to analyze"), - namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Namespace to query"), - group_by: Optional[str] = typer.Option(None, help="Group results by this field value"), + kind: Optional[str] = typer.Option(None, "--kind", "-k", help="Kind to analyze (falls back to config.kind)"), + namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Namespace to query (falls back to config.namespace; omit to use all)"), + group_by: Optional[str] = typer.Option(None, help="Group results by this field value (falls back to config.group_by_field)"), only_field: Optional[List[str]] = typer.Option(None, "--only-field", help="Only consider these fields"), config: Optional[str] = typer.Option(None, help="Path to config.yaml"), project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), @@ -76,8 +71,16 @@ def cmd_analyze_fields( output_json: Optional[str] = typer.Option(None, help="Write raw JSON results to file"), ): cfg = _load_cfg(config, project, emulator_host, log_level) + + target_kind = kind or cfg.kind + target_namespace = namespace if namespace is not None else cfg.namespace + group_by_field = group_by if group_by is not None else cfg.group_by_field + + if not target_kind: + raise typer.BadParameter("--kind is required (either via flag or config.kind)") + result = analyze_field_contributions( - cfg, kind=kind, namespace=namespace, group_by_field=group_by, only_fields=list(only_field) if only_field else None + cfg, kind=target_kind, namespace=target_namespace, group_by_field=group_by_field, only_fields=list(only_field) if only_field else None ) if output_json: @@ -94,29 +97,24 @@ def cmd_cleanup( project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), log_level: Optional[str] = typer.Option(None, help="Logging level"), - namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to include"), - exclude_namespace: Optional[List[str]] = typer.Option(None, "--exclude-namespace", help="Namespaces to exclude"), - kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to include"), - exclude_kind: Optional[List[str]] = typer.Option(None, "--exclude-kind", help="Kinds to exclude"), - ttl_field: Optional[str] = typer.Option(None, help="TTL field name"), - delete_missing_ttl: bool = typer.Option(True, help="Delete when TTL field is missing"), - batch_size: Optional[int] = typer.Option(None, help="Delete batch size"), + namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), + kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)"), + ttl_field: Optional[str] = typer.Option(None, help="TTL field name (falls back to config.ttl_field)"), + delete_missing_ttl: Optional[bool] = typer.Option(None, help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)"), + batch_size: Optional[int] = typer.Option(None, help="Delete batch size (falls back to config.batch_size)"), dry_run: bool = typer.Option(False, help="Only report counts; do not delete"), ): cfg = _load_cfg(config, project, emulator_host, log_level) if namespace: - cfg.namespace_include = list(namespace) - if exclude_namespace: - cfg.namespace_exclude = list(exclude_namespace) + cfg.namespaces = list(namespace) if kind: - cfg.kinds_include = list(kind) - if exclude_kind: - cfg.kinds_exclude = list(exclude_kind) - if ttl_field: + cfg.kinds = list(kind) + if ttl_field is not None: cfg.ttl_field = ttl_field - cfg.delete_missing_ttl = delete_missing_ttl - if batch_size: + if delete_missing_ttl is not None: + cfg.delete_missing_ttl = delete_missing_ttl + if batch_size is not None: cfg.batch_size = batch_size totals = cleanup_expired(cfg, dry_run=dry_run) diff --git a/config.yaml b/config.yaml index 7537311..721dff4 100644 --- a/config.yaml +++ b/config.yaml @@ -5,11 +5,15 @@ project_id: null # e.g. "my-project"; if omitted, ADC or env will be used emulator_host: null # e.g. "localhost:8010"; if set, emulator mode is enabled -# Filters -namespace_include: [] # e.g. ["", "tenant-a"]; empty string means default namespace -namespace_exclude: [] -kinds_include: [] -kinds_exclude: [] +# Explicit filters (empty == all) +# Empty list of namespaces means: iterate over all namespaces (including default "") +namespaces: [] # e.g. ["", "tenant-a"] +# Empty list of kinds means: iterate over all kinds within each namespace +kinds: [] # e.g. ["SourceCollectionStateEntity"] + +# Optional defaults used by some commands +kind: null # default kind for analyze-fields +namespace: null # default namespace for analyze-fields # Cleanup ttl_field: "expireAt" diff --git a/gcd_tools/analyze_entity_fields.py b/gcd_tools/analyze_entity_fields.py index e4bddcd..6de862d 100644 --- a/gcd_tools/analyze_entity_fields.py +++ b/gcd_tools/analyze_entity_fields.py @@ -7,7 +7,7 @@ from google.cloud import datastore from google.cloud.datastore.helpers import entity_to_protobuf -from .config import AppConfig, build_client, format_size +from .config import AppConfig, build_client, format_size, list_namespaces logger = logging.getLogger(__name__) @@ -44,15 +44,13 @@ def _estimate_field_contributions( return dict(field_totals), total_size, entity_count -def analyze_field_contributions( - config: AppConfig, +def _analyze_single_namespace( + client: datastore.Client, kind: str, - namespace: Optional[str] = None, - group_by_field: Optional[str] = None, - only_fields: Optional[List[str]] = None, + namespace: Optional[str], + group_by_field: Optional[str], + only_fields: Optional[List[str]], ) -> Dict: - client = build_client(config) - query = client.query(kind=kind, namespace=namespace or None) if group_by_field: @@ -117,7 +115,37 @@ def analyze_field_contributions( } +def analyze_field_contributions( + config: AppConfig, + kind: str, + namespace: Optional[str] = None, + group_by_field: Optional[str] = None, + only_fields: Optional[List[str]] = None, +) -> Dict: + client = build_client(config) + + # If no namespace provided, iterate across all namespaces + if namespace is None: + results: Dict[str, Dict] = {} + for ns in list_namespaces(client): + results[ns or ""] = _analyze_single_namespace( + client, kind=kind, namespace=ns, group_by_field=group_by_field, only_fields=only_fields + ) + return {"by_namespace": results} + + # Single namespace + return _analyze_single_namespace( + client, kind=kind, namespace=namespace, group_by_field=group_by_field, only_fields=only_fields + ) + + def print_field_summary(result: Dict) -> None: + if "by_namespace" in result: + for ns, data in result["by_namespace"].items(): + print(f"\n=== namespace: {ns or '(default)'} ===") + print_field_summary(data) + return + if "grouped" in result: for group_key, data in result["grouped"].items(): ns = data.get("namespace") or "" diff --git a/gcd_tools/analyze_kinds.py b/gcd_tools/analyze_kinds.py index 323f318..31aac1a 100644 --- a/gcd_tools/analyze_kinds.py +++ b/gcd_tools/analyze_kinds.py @@ -11,8 +11,6 @@ build_client, list_namespaces, list_kinds, - apply_kind_filters, - apply_namespace_filters, format_size, ) @@ -40,15 +38,13 @@ def estimate_entity_count_and_size( def analyze_kinds(config: AppConfig) -> List[Dict]: client = build_client(config) - all_namespaces = list_namespaces(client) - namespaces = apply_namespace_filters( - all_namespaces, config.namespace_include, config.namespace_exclude - ) + # Determine namespaces: explicit list, or all + namespaces = config.namespaces if config.namespaces else list_namespaces(client) results: List[Dict] = [] for ns in namespaces: - kinds = list_kinds(client, ns) - kinds = apply_kind_filters(kinds, config.kinds_include, config.kinds_exclude) + # Determine kinds: explicit list, or all in namespace + kinds = config.kinds if config.kinds else list_kinds(client, ns) logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) for kind in kinds: diff --git a/gcd_tools/cleanup_expired.py b/gcd_tools/cleanup_expired.py index 0062dac..7ef36ef 100644 --- a/gcd_tools/cleanup_expired.py +++ b/gcd_tools/cleanup_expired.py @@ -2,7 +2,7 @@ import logging from datetime import datetime, timezone -from typing import Dict, Iterable, List, Optional +from typing import Dict, List, Optional from google.cloud import datastore @@ -11,8 +11,6 @@ build_client, list_namespaces, list_kinds, - apply_kind_filters, - apply_namespace_filters, chunked, ) @@ -33,17 +31,15 @@ def cleanup_expired( ) -> Dict[str, int]: client = build_client(config) - all_namespaces = list_namespaces(client) - namespaces = apply_namespace_filters( - all_namespaces, config.namespace_include, config.namespace_exclude - ) + # Determine namespaces: explicit list, or all + namespaces = config.namespaces if config.namespaces else list_namespaces(client) totals: Dict[str, int] = {} now = datetime.now(timezone.utc) for ns in namespaces: - kinds = list_kinds(client, ns) - kinds = apply_kind_filters(kinds, config.kinds_include, config.kinds_exclude) + # Determine kinds: explicit list, or all in namespace + kinds = config.kinds if config.kinds else list_kinds(client, ns) for kind in kinds: query = client.query(kind=kind, namespace=ns or None) diff --git a/gcd_tools/config.py b/gcd_tools/config.py index d1c3d81..568fd0e 100644 --- a/gcd_tools/config.py +++ b/gcd_tools/config.py @@ -3,7 +3,7 @@ import os import logging from dataclasses import dataclass, field -from typing import Dict, Iterable, List, Optional, Sequence, Tuple +from typing import Dict, Iterable, List, Optional, Sequence import yaml from google.cloud import datastore @@ -14,11 +14,13 @@ class AppConfig: project_id: Optional[str] = None emulator_host: Optional[str] = None - # Filters - namespace_include: List[str] = field(default_factory=list) - namespace_exclude: List[str] = field(default_factory=list) - kinds_include: List[str] = field(default_factory=list) - kinds_exclude: List[str] = field(default_factory=list) + # Explicit filters (when empty -> use all) + namespaces: List[str] = field(default_factory=list) + kinds: List[str] = field(default_factory=list) + + # Optional defaults for commands that need them (e.g., analyze-fields) + kind: Optional[str] = None + namespace: Optional[str] = None # Cleanup settings ttl_field: str = "expireAt" @@ -59,10 +61,13 @@ def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) -> config.project_id = merged.get("project_id") or os.getenv("DATASTORE_PROJECT_ID") config.emulator_host = merged.get("emulator_host") or os.getenv("DATASTORE_EMULATOR_HOST") - config.namespace_include = _as_list(merged.get("namespace_include")) - config.namespace_exclude = _as_list(merged.get("namespace_exclude")) - config.kinds_include = _as_list(merged.get("kinds_include")) - config.kinds_exclude = _as_list(merged.get("kinds_exclude")) + # Explicit lists (no include/exclude). Empty -> all + config.namespaces = _as_list(merged.get("namespaces")) + config.kinds = _as_list(merged.get("kinds")) + + # Optional defaults used by some commands + config.kind = merged.get("kind") + config.namespace = merged.get("namespace") config.ttl_field = merged.get("ttl_field", config.ttl_field) config.delete_missing_ttl = bool(merged.get("delete_missing_ttl", config.delete_missing_ttl)) @@ -117,29 +122,7 @@ def list_kinds(client: datastore.Client, namespace: Optional[str]) -> List[str]: return [e.key.name for e in query.fetch()] -def apply_namespace_filters(all_namespaces: Sequence[str], include: Sequence[str], exclude: Sequence[str]) -> List[str]: - selected = list(all_namespaces) - if include: - include_set = set(include) - selected = [ns for ns in selected if ns in include_set] - if exclude: - exclude_set = set(exclude) - selected = [ns for ns in selected if ns not in exclude_set] - return selected - - -def apply_kind_filters(all_kinds: Sequence[str], include: Sequence[str], exclude: Sequence[str]) -> List[str]: - selected = list(all_kinds) - if include: - include_set = set(include) - selected = [k for k in selected if k in include_set] - if exclude: - exclude_set = set(exclude) - selected = [k for k in selected if k not in exclude_set] - return selected - - -def chunked(iterable: Sequence, chunk_size: int) -> Iterable[Sequence]: +def chunked(iterable: Sequence, chunk_size: int): for i in range(0, len(iterable), max(1, chunk_size)): yield iterable[i : i + chunk_size] From 359add6eacd4abc9facbdd19281eccfe99391189 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 20:29:36 +0000 Subject: [PATCH 03/12] Refactor CLI options using Annotated for improved type hints and readability Co-authored-by: isisosirishorus --- cli.py | 84 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/cli.py b/cli.py index e5c7c16..329262b 100644 --- a/cli.py +++ b/cli.py @@ -2,6 +2,7 @@ import json from typing import List, Optional +from typing import Annotated import typer @@ -12,6 +13,30 @@ app = typer.Typer(help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)") +# Reusable option aliases +ConfigOpt = Annotated[Optional[str], typer.Option(None, "--config", help="Path to config.yaml")] +ProjectOpt = Annotated[Optional[str], typer.Option(None, "--project", help="GCP/Emulator project id")] +EmulatorHostOpt = Annotated[ + Optional[str], typer.Option(None, "--emulator-host", help="Emulator host, e.g. localhost:8010") +] +LogLevelOpt = Annotated[Optional[str], typer.Option(None, "--log-level", help="Logging level")] +NamespacesOpt = Annotated[ + Optional[List[str]], + typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), +] +KindsOpt = Annotated[ + Optional[List[str]], + typer.Option( + None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)" + ), +] +SingleNamespaceOpt = Annotated[ + Optional[str], typer.Option(None, "--namespace", "-n", help="Namespace to query (omit to use all)") +] +SingleKindOpt = Annotated[ + Optional[str], typer.Option(None, "--kind", "-k", help="Kind to analyze (falls back to config.kind)") +] + def _load_cfg( config_path: Optional[str], @@ -31,12 +56,12 @@ def _load_cfg( @app.command("analyze-kinds") def cmd_analyze_kinds( - config: Optional[str] = typer.Option(None, help="Path to config.yaml"), - project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), - emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), - log_level: Optional[str] = typer.Option(None, help="Logging level"), - namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), - kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)"), + config: ConfigOpt, + project: ProjectOpt, + emulator_host: EmulatorHostOpt, + log_level: LogLevelOpt, + namespace: NamespacesOpt, + kind: KindsOpt, output: Optional[str] = typer.Option(None, help="Output CSV file path"), ): cfg = _load_cfg(config, project, emulator_host, log_level) @@ -60,15 +85,15 @@ def cmd_analyze_kinds( @app.command("analyze-fields") def cmd_analyze_fields( - kind: Optional[str] = typer.Option(None, "--kind", "-k", help="Kind to analyze (falls back to config.kind)"), - namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Namespace to query (falls back to config.namespace; omit to use all)"), - group_by: Optional[str] = typer.Option(None, help="Group results by this field value (falls back to config.group_by_field)"), - only_field: Optional[List[str]] = typer.Option(None, "--only-field", help="Only consider these fields"), - config: Optional[str] = typer.Option(None, help="Path to config.yaml"), - project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), - emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), - log_level: Optional[str] = typer.Option(None, help="Logging level"), - output_json: Optional[str] = typer.Option(None, help="Write raw JSON results to file"), + kind: SingleKindOpt, + namespace: SingleNamespaceOpt, + group_by: Annotated[Optional[str], typer.Option(None, help="Group results by this field value (falls back to config.group_by_field)")], + only_field: Annotated[Optional[List[str]], typer.Option(None, "--only-field", help="Only consider these fields")], + config: ConfigOpt, + project: ProjectOpt, + emulator_host: EmulatorHostOpt, + log_level: LogLevelOpt, + output_json: Annotated[Optional[str], typer.Option(None, help="Write raw JSON results to file")], ): cfg = _load_cfg(config, project, emulator_host, log_level) @@ -80,7 +105,11 @@ def cmd_analyze_fields( raise typer.BadParameter("--kind is required (either via flag or config.kind)") result = analyze_field_contributions( - cfg, kind=target_kind, namespace=target_namespace, group_by_field=group_by_field, only_fields=list(only_field) if only_field else None + cfg, + kind=target_kind, + namespace=target_namespace, + group_by_field=group_by_field, + only_fields=list(only_field) if only_field else None, ) if output_json: @@ -93,16 +122,19 @@ def cmd_analyze_fields( @app.command("cleanup") def cmd_cleanup( - config: Optional[str] = typer.Option(None, help="Path to config.yaml"), - project: Optional[str] = typer.Option(None, help="GCP/Emulator project id"), - emulator_host: Optional[str] = typer.Option(None, help="Emulator host, e.g. localhost:8010"), - log_level: Optional[str] = typer.Option(None, help="Logging level"), - namespace: Optional[List[str]] = typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), - kind: Optional[List[str]] = typer.Option(None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)"), - ttl_field: Optional[str] = typer.Option(None, help="TTL field name (falls back to config.ttl_field)"), - delete_missing_ttl: Optional[bool] = typer.Option(None, help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)"), - batch_size: Optional[int] = typer.Option(None, help="Delete batch size (falls back to config.batch_size)"), - dry_run: bool = typer.Option(False, help="Only report counts; do not delete"), + config: ConfigOpt, + project: ProjectOpt, + emulator_host: EmulatorHostOpt, + log_level: LogLevelOpt, + namespace: NamespacesOpt, + kind: KindsOpt, + ttl_field: Annotated[Optional[str], typer.Option(None, help="TTL field name (falls back to config.ttl_field)")], + delete_missing_ttl: Annotated[ + Optional[bool], + typer.Option(None, help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)"), + ], + batch_size: Annotated[Optional[int], typer.Option(None, help="Delete batch size (falls back to config.batch_size)")], + dry_run: Annotated[bool, typer.Option(False, help="Only report counts; do not delete")], ): cfg = _load_cfg(config, project, emulator_host, log_level) From 953df966b10dbb17f0c5e2171070b40f48409e95 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 23:36:17 +0000 Subject: [PATCH 04/12] Checkpoint before follow-up message Co-authored-by: isisosirishorus --- .github/workflows/pr.yml | 23 +++++++++++++++++++++++ .github/workflows/release.yml | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 .github/workflows/pr.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..2ba4f57 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,23 @@ +name: pr + +on: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install + run: | + python -m pip install -U pip + python -m pip install . + python -m pip install pytest + - name: Test + run: pytest -q \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..9d7dbde --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: release + +on: + push: + branches: [ main ] + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write # to push tags and release notes + id-token: write # for PyPI trusted publishing (optional) + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # full history for semantic-release + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install build and release tooling + run: | + python -m pip install -U pip + python -m pip install . + python -m pip install build python-semantic-release + - name: Run tests + run: | + python -m pip install pytest + pytest -q + - name: Semantic Release + env: + PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + semantic-release publish \ No newline at end of file From fbf5abb01796ca6965407355e8b46929c152d07d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 23:38:39 +0000 Subject: [PATCH 05/12] Enhance CI/CD workflows with linting, security checks, and release configuration Co-authored-by: isisosirishorus --- .github/workflows/pr.yml | 16 ++++++++++++++-- .github/workflows/release.yml | 31 ++++++++++++++++++++----------- .gitignore | 7 +++++++ pyproject.toml | 15 ++++++++++++++- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 2ba4f57..8bd86ea 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,10 +14,22 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' - name: Install run: | python -m pip install -U pip python -m pip install . - python -m pip install pytest + python -m pip install pytest ruff black build pip-audit + - name: Lint + run: | + ruff check . + black --check . - name: Test - run: pytest -q \ No newline at end of file + run: pytest -q + - name: Build and verify + run: | + python -m build + twine check dist/* || true + - name: Security audit + run: | + pip-audit -r requirements.txt || true \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9d7dbde..4208745 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,27 +8,36 @@ jobs: release: runs-on: ubuntu-latest permissions: - contents: write # to push tags and release notes - id-token: write # for PyPI trusted publishing (optional) + contents: write + id-token: write steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 # full history for semantic-release + fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Install build and release tooling + cache: 'pip' + - name: Install run: | python -m pip install -U pip python -m pip install . - python -m pip install build python-semantic-release - - name: Run tests + python -m pip install pytest ruff black build python-semantic-release pip-audit + - name: Lint run: | - python -m pip install pytest - pytest -q - - name: Semantic Release + ruff check . + black --check . + - name: Test + run: pytest -q + - name: Build and verify + run: | + python -m build + twine check dist/* || true + - name: Security audit + run: | + pip-audit -r requirements.txt || true + - name: Semantic Release (version, tag, GitHub release, PyPI) env: PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - semantic-release publish \ No newline at end of file + run: semantic-release publish \ No newline at end of file diff --git a/.gitignore b/.gitignore index b7faf40..e54dbfc 100644 --- a/.gitignore +++ b/.gitignore @@ -205,3 +205,10 @@ cython_debug/ marimo/_static/ marimo/_lsp/ __marimo__/ + +# Local configuration +config.yaml + +# Editor/OS +.DS_Store +Thumbs.db diff --git a/pyproject.toml b/pyproject.toml index b9fcc3a..862abd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,4 +44,17 @@ include = ["gcd_tools*"] line-length = 100 [tool.ruff] -line-length = 100 \ No newline at end of file +line-length = 100 + +[tool.semantic_release] +version_variable = "pyproject.toml:version" +branch = "main" +upload_to_pypi = true +dist_path = "dist" +build_command = "python -m build" +commit_message = "chore(release): {version} [skip ci]" +changelog_sections = "feature,fix,perf,refactor,docs,style,build,ci,chore" + +[tool.semantic_release.remote] +name = "origin" +repo_url = "https://github.com/your-org/local-storage-utils" \ No newline at end of file From 0198bad33930355ebbfec95b056bc68b6b7e6150 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 23:44:49 +0000 Subject: [PATCH 06/12] Update README with config guidance and publishing workflow details Co-authored-by: isisosirishorus --- README.md | 18 +++++++----------- config.yaml | 27 --------------------------- 2 files changed, 7 insertions(+), 38 deletions(-) delete mode 100644 config.yaml diff --git a/README.md b/README.md index 59b25c2..cc76d7f 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,11 @@ pip install -e . ## Configuration -- By default, the CLI loads `config.yaml` from the current directory if present. +- Create a local `config.yaml` in your working directory. It is gitignored and not included in the repo. - Any CLI flag overrides values from `config.yaml`. - If neither config nor flags provide a value, the tool falls back to environment variables (for emulator detection) or sensible defaults. -Key settings in `config.yaml`: +Example `config.yaml`: ```yaml project_id: "my-project" # If omitted, ADC/env will be used @@ -93,13 +93,9 @@ black . ## Publishing -- CI is configured to publish to PyPI on tags `v*`. -- Create a PyPI token and add it to repository secrets as `PYPI_API_TOKEN`. -- Tag and push: +- Automated: pushing to `main` triggers versioning, tagging, GitHub release, and PyPI publish via semantic-release. +- Prerequisites: + - Add a PyPI token to repo secrets as `PYPI_API_TOKEN`. + - Use conventional commits for proper versioning. -```bash -git tag v0.1.0 -git push origin v0.1.0 -``` - -The GitHub Actions workflow will build and upload the package to PyPI. +Main branch should be protected (require PRs, disallow direct pushes) in repository settings. diff --git a/config.yaml b/config.yaml deleted file mode 100644 index 721dff4..0000000 --- a/config.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Default configuration for local-storage-utils -# Values here can be overridden via CLI flags or environment variables. - -# Core connection -project_id: null # e.g. "my-project"; if omitted, ADC or env will be used -emulator_host: null # e.g. "localhost:8010"; if set, emulator mode is enabled - -# Explicit filters (empty == all) -# Empty list of namespaces means: iterate over all namespaces (including default "") -namespaces: [] # e.g. ["", "tenant-a"] -# Empty list of kinds means: iterate over all kinds within each namespace -kinds: [] # e.g. ["SourceCollectionStateEntity"] - -# Optional defaults used by some commands -kind: null # default kind for analyze-fields -namespace: null # default namespace for analyze-fields - -# Cleanup -ttl_field: "expireAt" -delete_missing_ttl: true -batch_size: 500 - -# Analysis -group_by_field: null - -# Logging -log_level: "INFO" \ No newline at end of file From 6c88a552fdcbd238fd2a793d5b982228239c77f9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 12 Aug 2025 23:53:09 +0000 Subject: [PATCH 07/12] Update README with Python installation and troubleshooting guidance Co-authored-by: isisosirishorus --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cc76d7f..f472b72 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,22 @@ This installs the `lsu` CLI. ```bash git clone cd local-storage-utils -python -m venv .venv +python3 -m venv .venv source .venv/bin/activate -pip install -U pip +python -m pip install -U pip pip install -e . ``` +### Troubleshooting local installs +- If you see "Command 'python' not found", use `python3 -m venv .venv` (above). Inside the venv, `python` will point to Python 3. +- If you see "externally-managed-environment", you are attempting a system-wide install. Always install into a virtual environment: + - Create a venv: `python3 -m venv .venv && source .venv/bin/activate` + - Then use the venv pip: `python -m pip install -U pip && pip install -e .` +- If venv creation fails with "ensurepip is not available", install venv tooling on Debian/Ubuntu and retry: + ```bash + sudo apt-get update && sudo apt-get install -y python3-venv + ``` + ## Configuration - Create a local `config.yaml` in your working directory. It is gitignored and not included in the repo. From 23cf174ea23ff8d3ee9da3262422889af84d2547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20P=C3=A9rez?= Date: Tue, 26 Aug 2025 00:01:29 +0200 Subject: [PATCH 08/12] Fix cli dependencies --- cli.py | 86 +++++++++++++++++++++++++--------------------------------- 1 file changed, 37 insertions(+), 49 deletions(-) diff --git a/cli.py b/cli.py index 329262b..17d84bd 100644 --- a/cli.py +++ b/cli.py @@ -1,8 +1,7 @@ from __future__ import annotations import json -from typing import List, Optional -from typing import Annotated +from typing import List, Optional, Annotated import typer @@ -13,29 +12,21 @@ app = typer.Typer(help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)") -# Reusable option aliases -ConfigOpt = Annotated[Optional[str], typer.Option(None, "--config", help="Path to config.yaml")] -ProjectOpt = Annotated[Optional[str], typer.Option(None, "--project", help="GCP/Emulator project id")] -EmulatorHostOpt = Annotated[ - Optional[str], typer.Option(None, "--emulator-host", help="Emulator host, e.g. localhost:8010") -] -LogLevelOpt = Annotated[Optional[str], typer.Option(None, "--log-level", help="Logging level")] +# Aliases with flags only — no defaults here +ConfigOpt = Annotated[Optional[str], typer.Option("--config", help="Path to config.yaml")] +ProjectOpt = Annotated[Optional[str], typer.Option("--project", help="GCP/Emulator project id")] +EmulatorHostOpt = Annotated[Optional[str], typer.Option("--emulator-host", help="Emulator host, e.g. localhost:8010")] +LogLevelOpt = Annotated[Optional[str], typer.Option("--log-level", help="Logging level")] NamespacesOpt = Annotated[ Optional[List[str]], - typer.Option(None, "--namespace", "-n", help="Namespaces to process (omit to process all)"), + typer.Option("--namespace", "-n", help="Namespaces to process (omit to process all)") ] KindsOpt = Annotated[ Optional[List[str]], - typer.Option( - None, "--kind", "-k", help="Kinds to process (omit to process all in each namespace)" - ), -] -SingleNamespaceOpt = Annotated[ - Optional[str], typer.Option(None, "--namespace", "-n", help="Namespace to query (omit to use all)") -] -SingleKindOpt = Annotated[ - Optional[str], typer.Option(None, "--kind", "-k", help="Kind to analyze (falls back to config.kind)") + typer.Option("--kind", "-k", help="Kinds to process (omit to process all in each namespace)") ] +SingleNamespaceOpt = Annotated[Optional[str], typer.Option("--namespace", "-n", help="Namespace to query (omit to use all)")] +SingleKindOpt = Annotated[Optional[str], typer.Option("--kind", "-k", help="Kind to analyze (falls back to config.kind)")] def _load_cfg( @@ -56,13 +47,13 @@ def _load_cfg( @app.command("analyze-kinds") def cmd_analyze_kinds( - config: ConfigOpt, - project: ProjectOpt, - emulator_host: EmulatorHostOpt, - log_level: LogLevelOpt, - namespace: NamespacesOpt, - kind: KindsOpt, - output: Optional[str] = typer.Option(None, help="Output CSV file path"), + config: ConfigOpt = None, + project: ProjectOpt = None, + emulator_host: EmulatorHostOpt = None, + log_level: LogLevelOpt = None, + namespace: NamespacesOpt = None, + kind: KindsOpt = None, + output: Annotated[Optional[str], typer.Option("--output", help="Output CSV file path")] = None, ): cfg = _load_cfg(config, project, emulator_host, log_level) @@ -85,15 +76,15 @@ def cmd_analyze_kinds( @app.command("analyze-fields") def cmd_analyze_fields( - kind: SingleKindOpt, - namespace: SingleNamespaceOpt, - group_by: Annotated[Optional[str], typer.Option(None, help="Group results by this field value (falls back to config.group_by_field)")], - only_field: Annotated[Optional[List[str]], typer.Option(None, "--only-field", help="Only consider these fields")], - config: ConfigOpt, - project: ProjectOpt, - emulator_host: EmulatorHostOpt, - log_level: LogLevelOpt, - output_json: Annotated[Optional[str], typer.Option(None, help="Write raw JSON results to file")], + kind: SingleKindOpt = None, + namespace: SingleNamespaceOpt = None, + group_by: Annotated[Optional[str], typer.Option("--group-by", help="Group results by this field value (falls back to config.group_by_field)")] = None, + only_field: Annotated[Optional[List[str]], typer.Option("--only-field", help="Only consider these fields")] = None, + config: ConfigOpt = None, + project: ProjectOpt = None, + emulator_host: EmulatorHostOpt = None, + log_level: LogLevelOpt = None, + output_json: Annotated[Optional[str], typer.Option("--output-json", help="Write raw JSON results to file")] = None, ): cfg = _load_cfg(config, project, emulator_host, log_level) @@ -122,19 +113,16 @@ def cmd_analyze_fields( @app.command("cleanup") def cmd_cleanup( - config: ConfigOpt, - project: ProjectOpt, - emulator_host: EmulatorHostOpt, - log_level: LogLevelOpt, - namespace: NamespacesOpt, - kind: KindsOpt, - ttl_field: Annotated[Optional[str], typer.Option(None, help="TTL field name (falls back to config.ttl_field)")], - delete_missing_ttl: Annotated[ - Optional[bool], - typer.Option(None, help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)"), - ], - batch_size: Annotated[Optional[int], typer.Option(None, help="Delete batch size (falls back to config.batch_size)")], - dry_run: Annotated[bool, typer.Option(False, help="Only report counts; do not delete")], + config: ConfigOpt = None, + project: ProjectOpt = None, + emulator_host: EmulatorHostOpt = None, + log_level: LogLevelOpt = None, + namespace: NamespacesOpt = None, + kind: KindsOpt = None, + ttl_field: Annotated[Optional[str], typer.Option("--ttl-field", help="TTL field name (falls back to config.ttl_field)")] = None, + delete_missing_ttl: Annotated[Optional[bool], typer.Option("--delete-missing-ttl", help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)")] = None, + batch_size: Annotated[Optional[int], typer.Option("--batch-size", help="Delete batch size (falls back to config.batch_size)")] = None, + dry_run: Annotated[bool, typer.Option("--dry-run", help="Only report counts; do not delete")] = False, ): cfg = _load_cfg(config, project, emulator_host, log_level) @@ -155,4 +143,4 @@ def cmd_cleanup( if __name__ == "__main__": - app() \ No newline at end of file + app() From 378ff3cb1b9e1f30cdb53ce1d3e6dbcdf9a9b8c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20P=C3=A9rez?= Date: Tue, 26 Aug 2025 00:47:19 +0200 Subject: [PATCH 09/12] Using list of namespaces --- README.md | 1 - cli.py | 33 +++++++++------------------------ gcd_tools/analyze_kinds.py | 14 ++++---------- gcd_tools/config.py | 19 +++++++++++++++++-- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index f472b72..19ad010 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ kinds: [] # Empty -> iterate all kinds per namespace # Optional defaults kind: "SourceCollectionStateEntity" # Default for analyze-fields -namespace: "" # Default namespace for analyze-fields # Cleanup ttl_field: "expireAt" diff --git a/cli.py b/cli.py index 17d84bd..243d6b4 100644 --- a/cli.py +++ b/cli.py @@ -17,18 +17,12 @@ ProjectOpt = Annotated[Optional[str], typer.Option("--project", help="GCP/Emulator project id")] EmulatorHostOpt = Annotated[Optional[str], typer.Option("--emulator-host", help="Emulator host, e.g. localhost:8010")] LogLevelOpt = Annotated[Optional[str], typer.Option("--log-level", help="Logging level")] -NamespacesOpt = Annotated[ - Optional[List[str]], - typer.Option("--namespace", "-n", help="Namespaces to process (omit to process all)") -] KindsOpt = Annotated[ Optional[List[str]], - typer.Option("--kind", "-k", help="Kinds to process (omit to process all in each namespace)") + typer.Option("--kind", "-k", help="Kinds to process (omit or empty to process all in each namespace)") ] -SingleNamespaceOpt = Annotated[Optional[str], typer.Option("--namespace", "-n", help="Namespace to query (omit to use all)")] SingleKindOpt = Annotated[Optional[str], typer.Option("--kind", "-k", help="Kind to analyze (falls back to config.kind)")] - def _load_cfg( config_path: Optional[str], project: Optional[str], @@ -44,25 +38,22 @@ def _load_cfg( overrides["log_level"] = log_level return load_config(config_path, overrides) - @app.command("analyze-kinds") def cmd_analyze_kinds( config: ConfigOpt = None, project: ProjectOpt = None, emulator_host: EmulatorHostOpt = None, log_level: LogLevelOpt = None, - namespace: NamespacesOpt = None, kind: KindsOpt = None, output: Annotated[Optional[str], typer.Option("--output", help="Output CSV file path")] = None, ): cfg = _load_cfg(config, project, emulator_host, log_level) - if namespace: - cfg.namespaces = list(namespace) - if kind: - cfg.kinds = list(kind) - + if kind is not None: + # Normalise: treat [""] as empty (all kinds) + cfg.kinds = [k for k in kind if k] # drop empty strings rows = analyze_kinds(cfg) + if output: with open(output, "w", encoding="utf-8") as fh: fh.write("namespace,kind,count,size,bytes\n") @@ -73,11 +64,10 @@ def cmd_analyze_kinds( else: print_summary_table(rows) - @app.command("analyze-fields") def cmd_analyze_fields( kind: SingleKindOpt = None, - namespace: SingleNamespaceOpt = None, + namespace: Annotated[Optional[str], typer.Option("--namespace", "-n", help="Namespace to query (omit to use all)")] = None, group_by: Annotated[Optional[str], typer.Option("--group-by", help="Group results by this field value (falls back to config.group_by_field)")] = None, only_field: Annotated[Optional[List[str]], typer.Option("--only-field", help="Only consider these fields")] = None, config: ConfigOpt = None, @@ -100,7 +90,7 @@ def cmd_analyze_fields( kind=target_kind, namespace=target_namespace, group_by_field=group_by_field, - only_fields=list(only_field) if only_field else None, + only_fields=[f for f in only_field] if only_field else None, ) if output_json: @@ -110,14 +100,12 @@ def cmd_analyze_fields( else: print_field_summary(result) - @app.command("cleanup") def cmd_cleanup( config: ConfigOpt = None, project: ProjectOpt = None, emulator_host: EmulatorHostOpt = None, log_level: LogLevelOpt = None, - namespace: NamespacesOpt = None, kind: KindsOpt = None, ttl_field: Annotated[Optional[str], typer.Option("--ttl-field", help="TTL field name (falls back to config.ttl_field)")] = None, delete_missing_ttl: Annotated[Optional[bool], typer.Option("--delete-missing-ttl", help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)")] = None, @@ -126,10 +114,8 @@ def cmd_cleanup( ): cfg = _load_cfg(config, project, emulator_host, log_level) - if namespace: - cfg.namespaces = list(namespace) - if kind: - cfg.kinds = list(kind) + if kind is not None: + cfg.kinds = [k for k in kind if k] if ttl_field is not None: cfg.ttl_field = ttl_field if delete_missing_ttl is not None: @@ -141,6 +127,5 @@ def cmd_cleanup( deleted_sum = sum(totals.values()) typer.echo(f"Total entities {'to delete' if dry_run else 'deleted'}: {deleted_sum}") - if __name__ == "__main__": app() diff --git a/gcd_tools/analyze_kinds.py b/gcd_tools/analyze_kinds.py index 31aac1a..7f3e621 100644 --- a/gcd_tools/analyze_kinds.py +++ b/gcd_tools/analyze_kinds.py @@ -14,10 +14,8 @@ format_size, ) - logger = logging.getLogger(__name__) - def estimate_entity_count_and_size( client: datastore.Client, kind: str, namespace: Optional[str] ) -> Tuple[int, int]: @@ -34,18 +32,15 @@ def estimate_entity_count_and_size( count += 1 return count, total_size - def analyze_kinds(config: AppConfig) -> List[Dict]: client = build_client(config) - # Determine namespaces: explicit list, or all - namespaces = config.namespaces if config.namespaces else list_namespaces(client) + # Thanks to config.py normalisation, [] is the only “all” case + namespaces = config.namespaces or list_namespaces(client) results: List[Dict] = [] for ns in namespaces: - # Determine kinds: explicit list, or all in namespace - kinds = config.kinds if config.kinds else list_kinds(client, ns) - + kinds = config.kinds or list_kinds(client, ns) logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) for kind in kinds: count, total_bytes = estimate_entity_count_and_size(client, kind, ns) @@ -60,10 +55,9 @@ def analyze_kinds(config: AppConfig) -> List[Dict]: ) return results - def print_summary_table(rows: List[Dict]) -> None: # Plain stdout table for wide compatibility print("namespace,kind,count,size,bytes") for r in rows: ns = r.get("namespace") or "" - print(f"{ns},{r['kind']},{r['count']},{r['size']},{r['bytes']}") \ No newline at end of file + print(f"{ns},{r['kind']},{r['count']},{r['size']},{r['bytes']}") diff --git a/gcd_tools/config.py b/gcd_tools/config.py index 568fd0e..071ce9e 100644 --- a/gcd_tools/config.py +++ b/gcd_tools/config.py @@ -65,6 +65,12 @@ def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) -> config.namespaces = _as_list(merged.get("namespaces")) config.kinds = _as_list(merged.get("kinds")) + # 🛠 Normalise: treat [""] as empty + if config.namespaces == [""] or config.namespaces is None: + config.namespaces = [] + if config.kinds == [""] or config.kinds is None: + config.kinds = [] + # Optional defaults used by some commands config.kind = merged.get("kind") config.namespace = merged.get("namespace") @@ -81,6 +87,7 @@ def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) -> return config + def _configure_logging(level: str) -> None: level_value = getattr(logging, level.upper(), logging.INFO) logging.basicConfig(level=level_value, format="%(asctime)s | %(levelname)s | %(message)s") @@ -105,14 +112,22 @@ def build_client(config: AppConfig) -> datastore.Client: def list_namespaces(client: datastore.Client) -> List[str]: - # Include default namespace as "" first + """ + Return all namespaces in the datastore, including the default (""). + Always queries __namespace__ in the root context so it works in emulator/GCP. + """ + # Include default namespace "" first namespaces: List[str] = [""] - query = client.query(kind="__namespace__") + + # Force namespace=None to query the metadata root + query = client.query(kind="__namespace__", namespace=None) query.keys_only() + for entity in query.fetch(): name = entity.key.name or "" if name != "": namespaces.append(name) + return namespaces From 14bfe22e06fe4962d9999b53ef864365b901c663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20P=C3=A9rez?= Date: Tue, 26 Aug 2025 21:25:58 +0200 Subject: [PATCH 10/12] using stats --- gcd_tools/analyze_kinds.py | 81 +++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/gcd_tools/analyze_kinds.py b/gcd_tools/analyze_kinds.py index 7f3e621..5b31f87 100644 --- a/gcd_tools/analyze_kinds.py +++ b/gcd_tools/analyze_kinds.py @@ -16,25 +16,60 @@ logger = logging.getLogger(__name__) -def estimate_entity_count_and_size( - client: datastore.Client, kind: str, namespace: Optional[str] -) -> Tuple[int, int]: - query = client.query(kind=kind, namespace=namespace or None) - total_size = 0 - count = 0 - for entity in query.fetch(): - try: - raw_proto = entity_to_protobuf(entity)._pb - total_size += len(raw_proto.SerializeToString()) - except Exception: - # Fallback: count only - pass - count += 1 - return count, total_size - -def analyze_kinds(config: AppConfig) -> List[Dict]: + +def get_kind_stats(client, kind: str, namespace: Optional[str] = None) -> Tuple[Optional[int], Optional[int]]: + """ + Returns (count, bytes) for the given kind/namespace using Datastore statistics. + Falls back to None if not found. + """ + if namespace: + stats_kind = "__Stat_Kind_Ns__" + query = client.query(kind=stats_kind) + query.add_filter("kind_name", "=", kind) + query.add_filter("namespace_name", "=", namespace) + else: + stats_kind = "__Stat_Kind__" + query = client.query(kind=stats_kind) + query.add_filter("kind_name", "=", kind) + + results = list(query.fetch(limit=1)) + if results: + return results[0]["count"], results[0]["bytes"] + return None, None + + +def estimate_entity_count_and_size(client, kind: str, namespace: Optional[str], sample_size: int = 100) -> Tuple[int, int]: + """ + Original keys-only method: exact count, approximate bytes via sampling. + """ + # Count with keys-only + count_query = client.query(kind=kind, namespace=namespace or None) + count_query.keys_only() + total_count = sum(1 for _ in count_query.fetch()) + + # Sample for size + sample_query = client.query(kind=kind, namespace=namespace or None) + sample_entities = list(sample_query.fetch(limit=sample_size)) + if sample_entities: + avg_size = sum(len(entity_to_protobuf(e)._pb.SerializeToString()) for e in sample_entities) / len(sample_entities) + else: + avg_size = 0 + + return total_count, int(avg_size * total_count) + + +def analyze_kinds(config: AppConfig, method: Optional[str] = None) -> List[Dict]: + """ + Analyze kinds using either: + - 'stats' (default) => fast built-in Datastore statistics + - 'scan' => keys-only scan with sampling + Falls back to 'scan' if stats are missing for a kind. + """ client = build_client(config) + # Decide method priority: parameter > config > default + method = method or getattr(config, "method", None) or "stats" + # Thanks to config.py normalisation, [] is the only “all” case namespaces = config.namespaces or list_namespaces(client) @@ -43,7 +78,16 @@ def analyze_kinds(config: AppConfig) -> List[Dict]: kinds = config.kinds or list_kinds(client, ns) logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) for kind in kinds: - count, total_bytes = estimate_entity_count_and_size(client, kind, ns) + if method == "stats": + count, total_bytes = get_kind_stats(client, kind, ns) + if count is None: + logger.warning("Stats not found for kind=%s, ns=%s — falling back to scan", kind, ns or "(default)") + count, total_bytes = estimate_entity_count_and_size(client, kind, ns) + elif method == "scan": + count, total_bytes = estimate_entity_count_and_size(client, kind, ns) + else: + raise ValueError(f"Unknown method: {method}") + results.append( { "namespace": ns, @@ -55,6 +99,7 @@ def analyze_kinds(config: AppConfig) -> List[Dict]: ) return results + def print_summary_table(rows: List[Dict]) -> None: # Plain stdout table for wide compatibility print("namespace,kind,count,size,bytes") From f7635b5e7e33028cdcedc55f7399c01383ad98ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20P=C3=A9rez?= Date: Fri, 17 Oct 2025 01:08:23 +0200 Subject: [PATCH 11/12] First working version --- README.md | 3 -- gcd_tools/analyze_entity_fields.py | 3 +- gcd_tools/analyze_kinds.py | 3 +- gcd_tools/cleanup_expired.py | 25 +++++++++++------ tests/test_commands.py | 45 ++++++++++++++++++++++++++++++ tests/test_import.py | 4 +++ 6 files changed, 69 insertions(+), 14 deletions(-) create mode 100644 tests/test_commands.py diff --git a/README.md b/README.md index 19ad010..23df618 100644 --- a/README.md +++ b/README.md @@ -12,21 +12,18 @@ This installs the `lsu` CLI. ## Install (from source) -```bash git clone cd local-storage-utils python3 -m venv .venv source .venv/bin/activate python -m pip install -U pip pip install -e . -``` ### Troubleshooting local installs - If you see "Command 'python' not found", use `python3 -m venv .venv` (above). Inside the venv, `python` will point to Python 3. - If you see "externally-managed-environment", you are attempting a system-wide install. Always install into a virtual environment: - Create a venv: `python3 -m venv .venv && source .venv/bin/activate` - Then use the venv pip: `python -m pip install -U pip && pip install -e .` -- If venv creation fails with "ensurepip is not available", install venv tooling on Debian/Ubuntu and retry: ```bash sudo apt-get update && sudo apt-get install -y python3-venv ``` diff --git a/gcd_tools/analyze_entity_fields.py b/gcd_tools/analyze_entity_fields.py index 6de862d..d23c5e0 100644 --- a/gcd_tools/analyze_entity_fields.py +++ b/gcd_tools/analyze_entity_fields.py @@ -28,7 +28,8 @@ def _estimate_field_contributions( total_size = 0 entity_count = 0 - for entity in entities: + from tqdm import tqdm + for entity in tqdm(list(entities), desc="Analyzing field contributions", unit="entity"): entity_count += 1 proto = entity_to_protobuf(entity)._pb full_size = len(proto.SerializeToString()) diff --git a/gcd_tools/analyze_kinds.py b/gcd_tools/analyze_kinds.py index 5b31f87..a679f5c 100644 --- a/gcd_tools/analyze_kinds.py +++ b/gcd_tools/analyze_kinds.py @@ -73,11 +73,12 @@ def analyze_kinds(config: AppConfig, method: Optional[str] = None) -> List[Dict] # Thanks to config.py normalisation, [] is the only “all” case namespaces = config.namespaces or list_namespaces(client) + from tqdm import tqdm results: List[Dict] = [] for ns in namespaces: kinds = config.kinds or list_kinds(client, ns) logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) - for kind in kinds: + for kind in tqdm(kinds, desc=f"Analyzing kinds in ns={ns or '(default)'}", unit="kind"): if method == "stats": count, total_bytes = get_kind_stats(client, kind, ns) if count is None: diff --git a/gcd_tools/cleanup_expired.py b/gcd_tools/cleanup_expired.py index 7ef36ef..0f522b9 100644 --- a/gcd_tools/cleanup_expired.py +++ b/gcd_tools/cleanup_expired.py @@ -6,6 +6,8 @@ from google.cloud import datastore +from tqdm import tqdm + from .config import ( AppConfig, build_client, @@ -44,7 +46,8 @@ def cleanup_expired( for kind in kinds: query = client.query(kind=kind, namespace=ns or None) to_delete: List[datastore.Key] = [] - for entity in query.fetch(): + entities = list(query.fetch()) + for entity in tqdm(entities, desc=f"Scanning {kind} in ns={ns or '(default)'}", unit="entity"): expire_at = entity.get(config.ttl_field) expired = expire_at is None if config.delete_missing_ttl else False if not expired and expire_at is not None: @@ -65,13 +68,17 @@ def cleanup_expired( ) totals[f"{ns}:{kind}"] = len(to_delete) else: - deleted = _delete_in_batches(client, to_delete, config.batch_size) if to_delete else 0 - logger.info( - "ns=%s kind=%s deleted %d expired entities", - ns or "(default)", - kind, - deleted, - ) - totals[f"{ns}:{kind}"] = deleted + deleted = 0 + if to_delete: + for batch in tqdm(list(chunked(to_delete, config.batch_size)), desc=f"Deleting {kind} in ns={ns or '(default)'}", unit="batch"): + client.delete_multi(batch) + deleted += len(batch) + logger.info( + "ns=%s kind=%s deleted %d expired entities", + ns or "(default)", + kind, + deleted, + ) + totals[f"{ns}:{kind}"] = deleted return totals \ No newline at end of file diff --git a/tests/test_commands.py b/tests/test_commands.py new file mode 100644 index 0000000..0742187 --- /dev/null +++ b/tests/test_commands.py @@ -0,0 +1,45 @@ + +import sys +import os +import pytest +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from gcd_tools import analyze_kinds, analyze_entity_fields, cleanup_expired, config +from gcd_tools.config import AppConfig + +# Dummy config for testing (adjust as needed for emulator) +def make_dummy_config(): + return AppConfig( + project_id="dummy-project", + emulator_host="localhost:8080", + namespaces=[""], + kinds=["TestKind"], + ttl_field="expireAt", + delete_missing_ttl=True, + batch_size=10, + group_by_field=None, + log_level="INFO", + ) + +def test_analyze_kinds_runs(): + cfg = make_dummy_config() + try: + result = analyze_kinds(cfg) + assert isinstance(result, list) + except Exception as e: + pytest.skip(f"analyze_kinds requires emulator: {e}") + +def test_analyze_fields_runs(): + cfg = make_dummy_config() + try: + result = analyze_entity_fields.analyze_field_contributions(cfg, kind="TestKind") + assert isinstance(result, dict) + except Exception as e: + pytest.skip(f"analyze_fields requires emulator: {e}") + +def test_cleanup_expired_runs(): + cfg = make_dummy_config() + try: + result = cleanup_expired.cleanup_expired(cfg, dry_run=True) + assert isinstance(result, dict) + except Exception as e: + pytest.skip(f"cleanup_expired requires emulator: {e}") diff --git a/tests/test_import.py b/tests/test_import.py index b7f63dd..e87bc50 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,3 +1,7 @@ +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + def test_imports(): import gcd_tools from gcd_tools import analyze_kinds, analyze_entity_fields, cleanup_expired, config From 8ca20feb918504cb4d9975b8365a31a40efbd096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20P=C3=A9rez?= Date: Fri, 17 Oct 2025 02:02:28 +0200 Subject: [PATCH 12/12] Move to command folder --- cli.py | 8 ++--- {gcd_tools => commands}/__init__.py | 22 ++++++------ .../analyze_entity_fields.py | 10 ++++-- {gcd_tools => commands}/analyze_kinds.py | 9 +++-- {gcd_tools => commands}/cleanup_expired.py | 36 ++++++++++--------- {gcd_tools => commands}/config.py | 4 +-- pyproject.toml | 2 +- tests/test_commands.py | 13 +++++-- tests/test_import.py | 6 ++-- 9 files changed, 63 insertions(+), 47 deletions(-) rename {gcd_tools => commands}/__init__.py (59%) rename {gcd_tools => commands}/analyze_entity_fields.py (94%) rename {gcd_tools => commands}/analyze_kinds.py (92%) rename {gcd_tools => commands}/cleanup_expired.py (71%) rename {gcd_tools => commands}/config.py (99%) diff --git a/cli.py b/cli.py index 243d6b4..8d97b20 100644 --- a/cli.py +++ b/cli.py @@ -5,10 +5,10 @@ import typer -from gcd_tools.config import AppConfig, load_config -from gcd_tools.analyze_kinds import analyze_kinds, print_summary_table -from gcd_tools.analyze_entity_fields import analyze_field_contributions, print_field_summary -from gcd_tools.cleanup_expired import cleanup_expired +from commands.config import AppConfig, load_config +from commands.analyze_kinds import analyze_kinds, print_summary_table +from commands.analyze_entity_fields import analyze_field_contributions, print_field_summary +from commands.cleanup_expired import cleanup_expired app = typer.Typer(help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)") diff --git a/gcd_tools/__init__.py b/commands/__init__.py similarity index 59% rename from gcd_tools/__init__.py rename to commands/__init__.py index a1c8e85..7493223 100644 --- a/gcd_tools/__init__.py +++ b/commands/__init__.py @@ -5,14 +5,14 @@ from . import config as config __all__ = [ - "AppConfig", - "load_config", - "build_client", - "list_namespaces", - "list_kinds", - "format_size", - "analyze_kinds", - "analyze_entity_fields", - "cleanup_expired", - "config", -] \ No newline at end of file + "AppConfig", + "load_config", + "build_client", + "list_namespaces", + "list_kinds", + "format_size", + "analyze_kinds", + "analyze_entity_fields", + "cleanup_expired", + "config", +] diff --git a/gcd_tools/analyze_entity_fields.py b/commands/analyze_entity_fields.py similarity index 94% rename from gcd_tools/analyze_entity_fields.py rename to commands/analyze_entity_fields.py index d23c5e0..fa70fc6 100644 --- a/gcd_tools/analyze_entity_fields.py +++ b/commands/analyze_entity_fields.py @@ -125,10 +125,14 @@ def analyze_field_contributions( ) -> Dict: client = build_client(config) - # If no namespace provided, iterate across all namespaces + # If no namespace provided, or config.namespaces is None/empty, iterate all namespaces if namespace is None: + if hasattr(config, "namespaces") and (not config.namespaces): + ns_list = list_namespaces(client) + else: + ns_list = [namespace] if namespace else list_namespaces(client) results: Dict[str, Dict] = {} - for ns in list_namespaces(client): + for ns in ns_list: results[ns or ""] = _analyze_single_namespace( client, kind=kind, namespace=ns, group_by_field=group_by_field, only_fields=only_fields ) @@ -161,4 +165,4 @@ def print_field_summary(result: Dict) -> None: ) for field, stats in result["fields"].items(): avg = stats["avg_per_entity"] - print(f" {field:30} {stats['human']:>12} ({avg:.1f} bytes avg)") \ No newline at end of file + print(f" {field:30} {stats['human']:>12} ({avg:.1f} bytes avg)") diff --git a/gcd_tools/analyze_kinds.py b/commands/analyze_kinds.py similarity index 92% rename from gcd_tools/analyze_kinds.py rename to commands/analyze_kinds.py index a679f5c..9807532 100644 --- a/gcd_tools/analyze_kinds.py +++ b/commands/analyze_kinds.py @@ -70,13 +70,18 @@ def analyze_kinds(config: AppConfig, method: Optional[str] = None) -> List[Dict] # Decide method priority: parameter > config > default method = method or getattr(config, "method", None) or "stats" - # Thanks to config.py normalisation, [] is the only “all” case - namespaces = config.namespaces or list_namespaces(client) + # If namespaces is None or empty, iterate all available namespaces + if not config.namespaces: + namespaces = list_namespaces(client) + else: + namespaces = config.namespaces + print(f"Found namespaces: {namespaces}") from tqdm import tqdm results: List[Dict] = [] for ns in namespaces: kinds = config.kinds or list_kinds(client, ns) + print(f"Namespace '{ns}': found kinds: {kinds}") logger.info("Analyzing namespace=%s, %d kinds", ns or "(default)", len(kinds)) for kind in tqdm(kinds, desc=f"Analyzing kinds in ns={ns or '(default)'}", unit="kind"): if method == "stats": diff --git a/gcd_tools/cleanup_expired.py b/commands/cleanup_expired.py similarity index 71% rename from gcd_tools/cleanup_expired.py rename to commands/cleanup_expired.py index 0f522b9..4d8b9d4 100644 --- a/gcd_tools/cleanup_expired.py +++ b/commands/cleanup_expired.py @@ -6,8 +6,6 @@ from google.cloud import datastore -from tqdm import tqdm - from .config import ( AppConfig, build_client, @@ -33,8 +31,11 @@ def cleanup_expired( ) -> Dict[str, int]: client = build_client(config) - # Determine namespaces: explicit list, or all - namespaces = config.namespaces if config.namespaces else list_namespaces(client) + # If namespaces is None or empty, iterate all available namespaces + if not config.namespaces: + namespaces = list_namespaces(client) + else: + namespaces = config.namespaces totals: Dict[str, int] = {} now = datetime.now(timezone.utc) @@ -47,6 +48,7 @@ def cleanup_expired( query = client.query(kind=kind, namespace=ns or None) to_delete: List[datastore.Key] = [] entities = list(query.fetch()) + from tqdm import tqdm for entity in tqdm(entities, desc=f"Scanning {kind} in ns={ns or '(default)'}", unit="entity"): expire_at = entity.get(config.ttl_field) expired = expire_at is None if config.delete_missing_ttl else False @@ -68,17 +70,17 @@ def cleanup_expired( ) totals[f"{ns}:{kind}"] = len(to_delete) else: - deleted = 0 - if to_delete: - for batch in tqdm(list(chunked(to_delete, config.batch_size)), desc=f"Deleting {kind} in ns={ns or '(default)'}", unit="batch"): - client.delete_multi(batch) - deleted += len(batch) - logger.info( - "ns=%s kind=%s deleted %d expired entities", - ns or "(default)", - kind, - deleted, - ) - totals[f"{ns}:{kind}"] = deleted + deleted = 0 + if to_delete: + for batch in tqdm(list(chunked(to_delete, config.batch_size)), desc=f"Deleting {kind} in ns={ns or '(default)'}", unit="batch"): + client.delete_multi(batch) + deleted += len(batch) + logger.info( + "ns=%s kind=%s deleted %d expired entities", + ns or "(default)", + kind, + deleted, + ) + totals[f"{ns}:{kind}"] = deleted - return totals \ No newline at end of file + return totals diff --git a/gcd_tools/config.py b/commands/config.py similarity index 99% rename from gcd_tools/config.py rename to commands/config.py index 071ce9e..420c993 100644 --- a/gcd_tools/config.py +++ b/commands/config.py @@ -8,7 +8,6 @@ import yaml from google.cloud import datastore - @dataclass class AppConfig: project_id: Optional[str] = None @@ -87,7 +86,6 @@ def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) -> return config - def _configure_logging(level: str) -> None: level_value = getattr(logging, level.upper(), logging.INFO) logging.basicConfig(level=level_value, format="%(asctime)s | %(levelname)s | %(message)s") @@ -148,4 +146,4 @@ def format_size(bytes_size: int) -> str: if size < 1024: return f"{size:.2f} {unit}" size /= 1024 - return f"{size:.2f} PB" \ No newline at end of file + return f"{size:.2f} PB" diff --git a/pyproject.toml b/pyproject.toml index 862abd0..0ff44d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ local-storage-utils = "cli:app" [tool.setuptools.packages.find] where = ["."] -include = ["gcd_tools*"] +include = ["commands*"] [tool.black] line-length = 100 diff --git a/tests/test_commands.py b/tests/test_commands.py index 0742187..70c05a3 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,10 +1,9 @@ - import sys import os import pytest sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from gcd_tools import analyze_kinds, analyze_entity_fields, cleanup_expired, config -from gcd_tools.config import AppConfig +from commands import analyze_kinds, analyze_entity_fields, cleanup_expired, config +from commands.config import AppConfig, build_client, list_namespaces # Dummy config for testing (adjust as needed for emulator) def make_dummy_config(): @@ -43,3 +42,11 @@ def test_cleanup_expired_runs(): assert isinstance(result, dict) except Exception as e: pytest.skip(f"cleanup_expired requires emulator: {e}") + +def test_list_namespaces_returns_default_and_any_custom(): + cfg = AppConfig(project_id="dummy-project", emulator_host="localhost:8010") + client = build_client(cfg) + namespaces = list_namespaces(client) + assert "" in namespaces # default namespace always present + # This test will pass if at least the default namespace is present + # Add more asserts if you know your emulator has more namespaces diff --git a/tests/test_import.py b/tests/test_import.py index e87bc50..c30178d 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -3,8 +3,8 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) def test_imports(): - import gcd_tools - from gcd_tools import analyze_kinds, analyze_entity_fields, cleanup_expired, config + import commands + from commands import analyze_kinds, analyze_entity_fields, cleanup_expired, config - assert gcd_tools is not None + assert commands is not None assert hasattr(config, "AppConfig") \ No newline at end of file