Skip to content

Commit d399530

Browse files
committed
gate check and download
1 parent 13b8bdd commit d399530

File tree

1 file changed

+19
-25
lines changed

1 file changed

+19
-25
lines changed

src/lerobot/scripts/download.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,6 @@ def _ensure_gate_dataset(
211211
gate_repo_id = f"{namespace}/{gate_name}"
212212
gate_path = out_dir / namespace / gate_name
213213

214-
LOGGER.error("============================================================")
215-
LOGGER.error(" CHECKING GATE DATASET ACCESS — ROBOCOIN CONSENT REQUIRED")
216-
LOGGER.error("============================================================")
217214

218215
# Check if gate dataset already exists
219216
if gate_path.exists() and any(gate_path.rglob("*")):
@@ -234,7 +231,7 @@ def _ensure_gate_dataset(
234231
enable_retry=False,
235232
)
236233
LOGGER.error("============================================================")
237-
LOGGER.error(" GATE CHECK PASSED — THANK YOU FOR SUPPORTING ROBOCOIN")
234+
LOGGER.error(" GATE CHECK PASSED — THANK YOU FOR SUPPORTING ROBOCOIN")
238235
LOGGER.error("============================================================")
239236
LOGGER.error("Gate dataset is ready at: %s", gate_path)
240237
LOGGER.error("Your consent keeps RoboCOIN sustainable and region-aware.")
@@ -247,7 +244,7 @@ def _ensure_gate_dataset(
247244
gate_url = f"https://modelscope.cn/datasets/{gate_repo_id}"
248245

249246
LOGGER.error("============================================================")
250-
LOGGER.error(" GATE DATASET ACCESS REQUIRED — PLEASE COMPLETE STATISTICS FORM")
247+
LOGGER.error(" GATE DATASET ACCESS REQUIRED — PLEASE COMPLETE STATISTICS FORM")
251248
LOGGER.error("============================================================")
252249
LOGGER.error("To improve RoboCOIN’s regional coverage and understand how the data")
253250
LOGGER.error("is used, we need a one-time, lightweight consent submission before")
@@ -278,8 +275,7 @@ def _download_from_hf(repo_id: str, target_dir: Path, token: str | None, max_wor
278275

279276
def _run() -> Path:
280277
try:
281-
# Create directory only after confirming download will succeed
282-
target_dir.mkdir(parents=True, exist_ok=True)
278+
# 让 huggingface_hub 自己创建 / 管理目录,避免在仓库不存在时生成空目录
283279
download_kwargs = {
284280
"repo_id": repo_id,
285281
"repo_type": "dataset",
@@ -343,9 +339,6 @@ def _run() -> Path:
343339
LOGGER.info("Logging in to ModelScope with provided token")
344340
HubApi().login(token)
345341

346-
# Create directory only after confirming download will succeed
347-
target_dir.mkdir(parents=True, exist_ok=True)
348-
349342
# Use dataset_snapshot_download for downloading dataset files
350343
# This downloads all raw files from the dataset repository
351344
LOGGER.info("Downloading dataset using dataset_snapshot_download...")
@@ -538,23 +531,24 @@ def main(argv: Sequence[str] | None = None) -> int:
538531
LOGGER.info(" Token: %s", "provided" if args.token else "not provided")
539532
return 0
540533

541-
# Perform gate check before actual download
534+
# Perform gate check before actual download (HuggingFace only)
542535
resolved_namespace = _resolve_namespace(args.namespace)
543536
resolved_token = _resolve_token(args.hub, args.token)
544-
try:
545-
_ensure_gate_dataset(
546-
hub=args.hub,
547-
namespace=resolved_namespace,
548-
out_dir=output_dir,
549-
token=resolved_token,
550-
max_workers=max(1, args.max_workers),
551-
max_retries=int(args.max_retry_time),
552-
)
553-
LOGGER.error("Gate check completed successfully. Proceeding with dataset downloads...")
554-
except RuntimeError as exc:
555-
# Gate dataset failure – abort cleanly before downloading other datasets
556-
LOGGER.error("Download aborted due to gate check failure: %s", exc)
557-
return 1
537+
if args.hub == "huggingface":
538+
try:
539+
_ensure_gate_dataset(
540+
hub=args.hub,
541+
namespace=resolved_namespace,
542+
out_dir=output_dir,
543+
token=resolved_token,
544+
max_workers=max(1, args.max_workers),
545+
max_retries=int(args.max_retry_time),
546+
)
547+
LOGGER.error("Gate check completed successfully. Proceeding with dataset downloads...")
548+
except RuntimeError as exc:
549+
# Gate dataset failure – abort cleanly before downloading other datasets
550+
LOGGER.error("Download aborted due to gate check failure: %s", exc)
551+
return 1
558552

559553
try:
560554
failures = download_datasets(

0 commit comments

Comments
 (0)