Skip to content

Commit cb8c3c4

Browse files
Add list_buckets tool to enumerate buckets at endpoints
Allows users to ask the bot what buckets are available at a given backend, making it easier to discover data sources before transfers. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b9e76d7 commit cb8c3c4

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

src/xfer/slackbot/claude_agent.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
get_source_stats,
2323
get_transfer_progress,
2424
get_transfer_progress_by_job,
25+
list_buckets,
2526
submit_transfer,
2627
)
2728

@@ -108,6 +109,28 @@
108109
"required": [],
109110
},
110111
},
112+
{
113+
"name": "list_buckets",
114+
"description": """List buckets available at a specific backend/endpoint.
115+
116+
Use this when users want to:
117+
- See what buckets exist at a storage endpoint
118+
- Browse available data sources before starting a transfer
119+
- Discover bucket names they can use in transfer paths
120+
121+
This runs 'rclone lsd' to list top-level directories (buckets) at the remote.
122+
Note: The backend must be in the allowed list, and credentials must have ListBuckets permission.""",
123+
"input_schema": {
124+
"type": "object",
125+
"properties": {
126+
"backend": {
127+
"type": "string",
128+
"description": "The backend name to list buckets for (e.g., 's3src', 'gcs')",
129+
},
130+
},
131+
"required": ["backend"],
132+
},
133+
},
111134
{
112135
"name": "cancel_job",
113136
"description": """Cancel a running transfer job.
@@ -248,6 +271,7 @@
248271
- Submit new data transfer jobs (source -> destination)
249272
- Check status of running/completed transfers
250273
- List available storage backends
274+
- List buckets available at each backend
251275
- Cancel jobs if requested
252276
- Request access to new backends on behalf of users
253277
- Scan source paths to get file statistics and transfer estimates
@@ -353,6 +377,16 @@ def execute_tool(
353377
backends = get_allowed_backends(self.config)
354378
return json.dumps({"allowed_backends": backends})
355379

380+
elif tool_name == "list_buckets":
381+
backend = tool_input["backend"]
382+
result = list_buckets(backend, self.config)
383+
return json.dumps({
384+
"backend": result.backend,
385+
"buckets": result.buckets,
386+
"bucket_count": len(result.buckets),
387+
"error": result.error,
388+
})
389+
356390
elif tool_name == "cancel_job":
357391
success, message = cancel_job(
358392
tool_input["job_id"],

src/xfer/slackbot/slurm_tools.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,15 @@ class PathCheckResult:
807807
details: Optional[str] = None
808808

809809

810+
@dataclass
811+
class BucketListResult:
812+
"""Result of listing buckets at an endpoint."""
813+
814+
backend: str
815+
buckets: list[str]
816+
error: Optional[str] = None
817+
818+
810819
def check_path_exists(path: str, config: BotConfig) -> PathCheckResult:
811820
"""
812821
Check if a bucket/path exists at a remote endpoint.
@@ -893,6 +902,97 @@ def check_path_exists(path: str, config: BotConfig) -> PathCheckResult:
893902
)
894903

895904

905+
def list_buckets(backend: str, config: BotConfig) -> BucketListResult:
906+
"""
907+
List buckets/top-level directories at a remote endpoint.
908+
909+
Uses rclone lsd to list directories at the root of the remote.
910+
911+
Args:
912+
backend: The backend name (e.g., "s3src" or "gcs").
913+
Can include a trailing colon (e.g., "s3src:").
914+
915+
Returns:
916+
BucketListResult with list of bucket names or an error.
917+
"""
918+
# Normalize backend name - remove trailing colon if present
919+
backend = backend.rstrip(":")
920+
921+
# Validate backend first
922+
valid, msg = validate_backend(backend, config)
923+
if not valid:
924+
return BucketListResult(
925+
backend=backend,
926+
buckets=[],
927+
error=msg,
928+
)
929+
930+
# Build rclone lsd command to list buckets
931+
# lsd lists directories, at root level these are buckets
932+
rclone_cmd = [
933+
"rclone",
934+
"lsd",
935+
f"{backend}:",
936+
"--config",
937+
config.rclone.container_conf_path,
938+
]
939+
940+
# Build srun command with container
941+
mounts = f"{config.rclone.config_path}:{config.rclone.container_conf_path}:ro"
942+
943+
srun_cmd = [
944+
"srun",
945+
"-n",
946+
"1",
947+
"-c",
948+
"2",
949+
"--container-image",
950+
config.rclone.image,
951+
"--container-mounts",
952+
mounts,
953+
"--no-container-remap-root",
954+
] + rclone_cmd
955+
956+
try:
957+
result = run_cmd(srun_cmd, capture=True, check=True)
958+
# Parse rclone lsd output - format is:
959+
# " -1 2024-01-15 10:30:00 -1 bucket-name"
960+
# We want the last column (bucket name)
961+
buckets = []
962+
for line in result.stdout.strip().splitlines():
963+
parts = line.split()
964+
if parts:
965+
# Bucket name is the last field
966+
buckets.append(parts[-1])
967+
968+
return BucketListResult(
969+
backend=backend,
970+
buckets=sorted(buckets),
971+
)
972+
except subprocess.CalledProcessError as e:
973+
error_output = e.stderr or str(e)
974+
975+
# Check for common error patterns
976+
if "AccessDenied" in error_output or "access denied" in error_output.lower():
977+
return BucketListResult(
978+
backend=backend,
979+
buckets=[],
980+
error="Access denied - credentials may not have ListBuckets permission",
981+
)
982+
elif "InvalidAccessKeyId" in error_output:
983+
return BucketListResult(
984+
backend=backend,
985+
buckets=[],
986+
error="Invalid access key - check credentials configuration",
987+
)
988+
else:
989+
return BucketListResult(
990+
backend=backend,
991+
buckets=[],
992+
error=f"Failed to list buckets: {error_output[:200]}",
993+
)
994+
995+
896996
@dataclass
897997
class JobLogs:
898998
"""Logs and analysis data for a job."""

0 commit comments

Comments
 (0)