Skip to content

Commit 152a926

Browse files
Copilotkhnumdev
andcommitted
feat: Add drive-sync feature with push and pull commands
Co-authored-by: khnumdev <[email protected]>
1 parent feab56b commit 152a926

File tree

7 files changed

+547
-27
lines changed

7 files changed

+547
-27
lines changed

cli.py

Lines changed: 93 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,30 @@
99
from commands.analyze_kinds import analyze_kinds, print_summary_table
1010
from commands.analyze_entity_fields import analyze_field_contributions, print_field_summary
1111
from commands.cleanup_expired import cleanup_expired
12+
from commands.drive_sync import push_to_drive, pull_from_drive
1213

13-
app = typer.Typer(help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)", no_args_is_help=True)
14+
app = typer.Typer(
15+
help="Utilities for analyzing and managing local Datastore/Firestore (Datastore mode)",
16+
no_args_is_help=True,
17+
)
1418

1519
# Aliases with flags only — no defaults here
1620
ConfigOpt = Annotated[Optional[str], typer.Option("--config", help="Path to config.yaml")]
1721
ProjectOpt = Annotated[Optional[str], typer.Option("--project", help="GCP/Emulator project id")]
18-
EmulatorHostOpt = Annotated[Optional[str], typer.Option("--emulator-host", help="Emulator host, e.g. localhost:8010")]
22+
EmulatorHostOpt = Annotated[
23+
Optional[str], typer.Option("--emulator-host", help="Emulator host, e.g. localhost:8010")
24+
]
1925
LogLevelOpt = Annotated[Optional[str], typer.Option("--log-level", help="Logging level")]
2026
KindsOpt = Annotated[
2127
Optional[List[str]],
22-
typer.Option("--kind", "-k", help="Kinds to process (omit or empty to process all in each namespace)")
28+
typer.Option(
29+
"--kind", "-k", help="Kinds to process (omit or empty to process all in each namespace)"
30+
),
31+
]
32+
SingleKindOpt = Annotated[
33+
Optional[str], typer.Option("--kind", "-k", help="Kind to analyze (falls back to config.kind)")
2334
]
24-
SingleKindOpt = Annotated[Optional[str], typer.Option("--kind", "-k", help="Kind to analyze (falls back to config.kind)")]
35+
2536

2637
def _load_cfg(
2738
config_path: Optional[str],
@@ -38,6 +49,7 @@ def _load_cfg(
3849
overrides["log_level"] = log_level
3950
return load_config(config_path, overrides)
4051

52+
4153
@app.command("analyze-kinds")
4254
def cmd_analyze_kinds(
4355
config: ConfigOpt = None,
@@ -64,17 +76,31 @@ def cmd_analyze_kinds(
6476
else:
6577
print_summary_table(rows)
6678

79+
6780
@app.command("analyze-fields")
6881
def cmd_analyze_fields(
6982
kind: SingleKindOpt = None,
70-
namespace: Annotated[Optional[str], typer.Option("--namespace", "-n", help="Namespace to query (omit to use all)")] = None,
71-
group_by: Annotated[Optional[str], typer.Option("--group-by", help="Group results by this field value (falls back to config.group_by_field)")] = None,
72-
only_field: Annotated[Optional[List[str]], typer.Option("--only-field", help="Only consider these fields")] = None,
83+
namespace: Annotated[
84+
Optional[str],
85+
typer.Option("--namespace", "-n", help="Namespace to query (omit to use all)"),
86+
] = None,
87+
group_by: Annotated[
88+
Optional[str],
89+
typer.Option(
90+
"--group-by",
91+
help="Group results by this field value (falls back to config.group_by_field)",
92+
),
93+
] = None,
94+
only_field: Annotated[
95+
Optional[List[str]], typer.Option("--only-field", help="Only consider these fields")
96+
] = None,
7397
config: ConfigOpt = None,
7498
project: ProjectOpt = None,
7599
emulator_host: EmulatorHostOpt = None,
76100
log_level: LogLevelOpt = None,
77-
output_json: Annotated[Optional[str], typer.Option("--output-json", help="Write raw JSON results to file")] = None,
101+
output_json: Annotated[
102+
Optional[str], typer.Option("--output-json", help="Write raw JSON results to file")
103+
] = None,
78104
):
79105
cfg = _load_cfg(config, project, emulator_host, log_level)
80106

@@ -100,17 +126,32 @@ def cmd_analyze_fields(
100126
else:
101127
print_field_summary(result)
102128

129+
103130
@app.command("cleanup")
104131
def cmd_cleanup(
105132
config: ConfigOpt = None,
106133
project: ProjectOpt = None,
107134
emulator_host: EmulatorHostOpt = None,
108135
log_level: LogLevelOpt = None,
109136
kind: KindsOpt = None,
110-
ttl_field: Annotated[Optional[str], typer.Option("--ttl-field", help="TTL field name (falls back to config.ttl_field)")] = None,
111-
delete_missing_ttl: Annotated[Optional[bool], typer.Option("--delete-missing-ttl", help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)")] = None,
112-
batch_size: Annotated[Optional[int], typer.Option("--batch-size", help="Delete batch size (falls back to config.batch_size)")] = None,
113-
dry_run: Annotated[bool, typer.Option("--dry-run", help="Only report counts; do not delete")] = False,
137+
ttl_field: Annotated[
138+
Optional[str],
139+
typer.Option("--ttl-field", help="TTL field name (falls back to config.ttl_field)"),
140+
] = None,
141+
delete_missing_ttl: Annotated[
142+
Optional[bool],
143+
typer.Option(
144+
"--delete-missing-ttl",
145+
help="Delete when TTL field is missing (falls back to config.delete_missing_ttl)",
146+
),
147+
] = None,
148+
batch_size: Annotated[
149+
Optional[int],
150+
typer.Option("--batch-size", help="Delete batch size (falls back to config.batch_size)"),
151+
] = None,
152+
dry_run: Annotated[
153+
bool, typer.Option("--dry-run", help="Only report counts; do not delete")
154+
] = False,
114155
):
115156
cfg = _load_cfg(config, project, emulator_host, log_level)
116157

@@ -127,6 +168,46 @@ def cmd_cleanup(
127168
deleted_sum = sum(totals.values())
128169
typer.echo(f"Total entities {'to delete' if dry_run else 'deleted'}: {deleted_sum}")
129170

171+
172+
@app.command("push")
173+
def cmd_push(
174+
version: Annotated[
175+
Optional[str], typer.Argument(help="Version name (defaults to today's date YYYY-mm-DD)")
176+
] = None,
177+
overwrite: Annotated[
178+
bool, typer.Option("-o", "--overwrite", help="Overwrite existing file with same name")
179+
] = False,
180+
local_db: Annotated[
181+
Optional[str],
182+
typer.Option(
183+
"--local-db", help="Path to local-db binary (falls back to config.local_db_path)"
184+
),
185+
] = None,
186+
config: ConfigOpt = None,
187+
log_level: LogLevelOpt = None,
188+
):
189+
cfg = _load_cfg(config, None, None, log_level)
190+
push_to_drive(cfg, version, overwrite, local_db)
191+
192+
193+
@app.command("pull")
194+
def cmd_pull(
195+
version: Annotated[
196+
Optional[str], typer.Argument(help="Version name (omit to download latest)")
197+
] = None,
198+
local_db: Annotated[
199+
Optional[str],
200+
typer.Option(
201+
"--local-db", help="Path to local-db binary (falls back to config.local_db_path)"
202+
),
203+
] = None,
204+
config: ConfigOpt = None,
205+
log_level: LogLevelOpt = None,
206+
):
207+
cfg = _load_cfg(config, None, None, log_level)
208+
pull_from_drive(cfg, version, local_db)
209+
210+
130211
if __name__ == "__main__":
131212
import sys
132213

commands/__init__.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,23 @@
22
from .analyze_kinds import analyze_kinds, get_kind_stats, estimate_entity_count_and_size
33
from .analyze_entity_fields import analyze_field_contributions, print_field_summary
44
from .cleanup_expired import cleanup_expired
5+
from .drive_sync import push_to_drive, pull_from_drive
56
from . import config as config
67

78
__all__ = [
8-
"AppConfig",
9-
"load_config",
10-
"build_client",
11-
"list_namespaces",
12-
"list_kinds",
13-
"format_size",
14-
"analyze_kinds",
15-
"get_kind_stats",
16-
"estimate_entity_count_and_size",
17-
"analyze_field_contributions",
18-
"print_field_summary",
19-
"cleanup_expired",
20-
"config",
9+
"AppConfig",
10+
"load_config",
11+
"build_client",
12+
"list_namespaces",
13+
"list_kinds",
14+
"format_size",
15+
"analyze_kinds",
16+
"get_kind_stats",
17+
"estimate_entity_count_and_size",
18+
"analyze_field_contributions",
19+
"print_field_summary",
20+
"cleanup_expired",
21+
"push_to_drive",
22+
"pull_from_drive",
23+
"config",
2124
]

commands/config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ class AppConfig:
3737
# Logging
3838
log_level: str = "INFO"
3939

40+
# Drive sync settings
41+
local_db_path: Optional[str] = None
42+
4043

4144
def _as_list(value: Optional[Iterable[str]]) -> List[str]:
4245
if value is None:
@@ -90,6 +93,8 @@ def load_config(path: Optional[str] = None, overrides: Optional[Dict] = None) ->
9093

9194
config.log_level = str(merged.get("log_level", config.log_level)).upper()
9295

96+
config.local_db_path = merged.get("local_db_path", config.local_db_path)
97+
9398
_configure_logging(config.log_level)
9499
return config
95100

@@ -155,4 +160,3 @@ def format_size(bytes_size: int) -> str:
155160
return f"{size:.2f} {unit}"
156161
size /= 1024
157162
return f"{size:.2f} PB"
158-

commands/drive_sync.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import os
5+
import subprocess
6+
from datetime import datetime
7+
from typing import Optional
8+
9+
from pydrive2.auth import GoogleAuth
10+
from pydrive2.drive import GoogleDrive
11+
12+
from .config import AppConfig
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def _authenticate_drive() -> GoogleDrive:
18+
gauth = GoogleAuth()
19+
gauth.LocalWebserverAuth()
20+
return GoogleDrive(gauth)
21+
22+
23+
def _get_local_db_path(config: AppConfig, local_db_override: Optional[str]) -> str:
24+
local_db = local_db_override or config.local_db_path
25+
if not local_db:
26+
raise ValueError("local-db path must be provided via --local-db or config.local_db_path")
27+
if not os.path.exists(local_db):
28+
raise FileNotFoundError(f"local-db binary not found at: {local_db}")
29+
return local_db
30+
31+
32+
def _get_or_create_datastore_folder(drive: GoogleDrive) -> str:
33+
file_list = drive.ListFile(
34+
{
35+
"q": "title='datastore' and mimeType='application/vnd.google-apps.folder' and trashed=false"
36+
}
37+
).GetList()
38+
if file_list:
39+
return file_list[0]["id"]
40+
folder = drive.CreateFile(
41+
{"title": "datastore", "mimeType": "application/vnd.google-apps.folder"}
42+
)
43+
folder.Upload()
44+
logger.info("Created /datastore folder in Google Drive")
45+
return folder["id"]
46+
47+
48+
def _run_local_db_command(local_db_path: str, args: list[str]) -> None:
49+
cmd = [local_db_path] + args
50+
logger.info(f"Running: {' '.join(cmd)}")
51+
result = subprocess.run(cmd, capture_output=True, text=True)
52+
if result.returncode != 0:
53+
raise RuntimeError(f"Command failed: {result.stderr}")
54+
if result.stdout:
55+
logger.info(result.stdout)
56+
57+
58+
def push_to_drive(
59+
config: AppConfig, version: Optional[str], overwrite: bool, local_db: Optional[str]
60+
) -> None:
61+
local_db_path = _get_local_db_path(config, local_db)
62+
63+
if not version:
64+
version = datetime.now().strftime("%Y-%m-%d")
65+
66+
backup_file = f"local-db-{version}.bin"
67+
68+
_run_local_db_command(local_db_path, ["stash", version])
69+
70+
if not os.path.exists(backup_file):
71+
raise FileNotFoundError(f"Stash did not produce expected file: {backup_file}")
72+
73+
drive = _authenticate_drive()
74+
folder_id = _get_or_create_datastore_folder(drive)
75+
76+
existing = drive.ListFile(
77+
{"q": f"title='{backup_file}' and '{folder_id}' in parents and trashed=false"}
78+
).GetList()
79+
if existing:
80+
if overwrite:
81+
logger.info(f"Overwriting existing file: {backup_file}")
82+
file_to_upload = existing[0]
83+
else:
84+
raise FileExistsError(
85+
f"File {backup_file} already exists in /datastore. Use -o to overwrite."
86+
)
87+
else:
88+
file_to_upload = drive.CreateFile({"title": backup_file, "parents": [{"id": folder_id}]})
89+
90+
file_to_upload.SetContentFile(backup_file)
91+
file_to_upload.Upload()
92+
logger.info(f"Successfully uploaded {backup_file} to Google Drive /datastore")
93+
94+
95+
def pull_from_drive(config: AppConfig, version: Optional[str], local_db: Optional[str]) -> None:
96+
local_db_path = _get_local_db_path(config, local_db)
97+
98+
drive = _authenticate_drive()
99+
folder_id = _get_or_create_datastore_folder(drive)
100+
101+
if version:
102+
backup_file = f"local-db-{version}.bin"
103+
files = drive.ListFile(
104+
{"q": f"title='{backup_file}' and '{folder_id}' in parents and trashed=false"}
105+
).GetList()
106+
if not files:
107+
raise FileNotFoundError(f"No backup found with version: {version}")
108+
file_to_download = files[0]
109+
else:
110+
files = drive.ListFile(
111+
{
112+
"q": f"'{folder_id}' in parents and trashed=false and title contains 'local-db-' and title contains '.bin'",
113+
"orderBy": "modifiedDate desc",
114+
"maxResults": 1,
115+
}
116+
).GetList()
117+
if not files:
118+
raise FileNotFoundError("No backups found in /datastore folder")
119+
file_to_download = files[0]
120+
backup_file = file_to_download["title"]
121+
122+
logger.info(f"Downloading {backup_file} from Google Drive")
123+
file_to_download.GetContentFile(backup_file)
124+
125+
version_to_restore = backup_file.replace("local-db-", "").replace(".bin", "")
126+
_run_local_db_command(local_db_path, ["restore", version_to_restore])
127+
logger.info(f"Successfully restored backup: {version_to_restore}")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dependencies = [
2727
"google-cloud-datastore>=2.19.0",
2828
"PyYAML>=6.0.1",
2929
"typer>=0.12.3",
30+
"pydrive2>=1.20.0",
3031
]
3132

3233
[project.optional-dependencies]

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
google-cloud-datastore>=2.19.0
22
PyYAML>=6.0.1
3-
typer>=0.12.3
3+
typer>=0.12.3
4+
pydrive2>=1.20.0

0 commit comments

Comments
 (0)