Skip to content

Commit 1c01ddb

Browse files
Add check_path_exists tool and fix uv environment setup
New features: - Add check_path_exists tool to verify bucket/path accessibility - If path doesn't exist, automatically notifies support channel - Detects common errors: NoSuchBucket, AccessDenied, NoSuchKey Bug fixes: - Add uv sync to prepare script to ensure environment is up to date - Use 'uv run xfer' instead of bare 'xfer' commands - Add XFER_INSTALL_DIR config option for xfer repo location Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 55446de commit 1c01ddb

File tree

3 files changed

+216
-5
lines changed

3 files changed

+216
-5
lines changed

src/xfer/slackbot/claude_agent.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .config import BotConfig
1515
from .slurm_tools import (
1616
cancel_job,
17+
check_path_exists,
1718
get_allowed_backends,
1819
get_job_status,
1920
get_jobs_by_thread,
@@ -179,6 +180,29 @@
179180
"required": ["source"],
180181
},
181182
},
183+
{
184+
"name": "check_path_exists",
185+
"description": """Check if a bucket or path exists at a remote endpoint.
186+
187+
Use this to verify that a source or destination path is accessible before starting a transfer.
188+
If the path doesn't exist, this will notify the support team so they can help resolve the issue.
189+
190+
Common reasons for paths not existing:
191+
- Bucket name is misspelled
192+
- Bucket hasn't been created yet
193+
- Credentials don't have access to the bucket
194+
- Wrong endpoint/region configured""",
195+
"input_schema": {
196+
"type": "object",
197+
"properties": {
198+
"path": {
199+
"type": "string",
200+
"description": "Path to check in rclone format (remote:bucket/path)",
201+
},
202+
},
203+
"required": ["path"],
204+
},
205+
},
182206
]
183207

184208
SYSTEM_PROMPT = """You are a helpful data transfer assistant for an HPC cluster. You help researchers submit and monitor data transfer jobs via Slurm.
@@ -190,6 +214,7 @@
190214
- Cancel jobs if requested
191215
- Request access to new backends on behalf of users
192216
- Scan source paths to get file statistics and transfer estimates
217+
- Verify that buckets/paths exist before starting transfers
193218
194219
Guidelines:
195220
1. Always validate that backends are allowed before submitting transfers
@@ -404,6 +429,79 @@ def execute_tool(
404429
result["error"] = stats.error
405430
return json.dumps(result)
406431

432+
elif tool_name == "check_path_exists":
433+
path = tool_input["path"]
434+
check_result = check_path_exists(path, self.config)
435+
436+
# If path doesn't exist, notify support channel
437+
support_notified = False
438+
if not check_result.exists and self.slack_client and self.config.support_channel:
439+
try:
440+
self.slack_client.chat_postMessage(
441+
channel=self.config.support_channel,
442+
text=f"Path access issue: {path}",
443+
blocks=[
444+
{
445+
"type": "header",
446+
"text": {
447+
"type": "plain_text",
448+
"text": "Path Access Issue",
449+
},
450+
},
451+
{
452+
"type": "section",
453+
"fields": [
454+
{
455+
"type": "mrkdwn",
456+
"text": f"*Path:*\n`{path}`",
457+
},
458+
{
459+
"type": "mrkdwn",
460+
"text": f"*Error:*\n{check_result.error or 'Unknown'}",
461+
},
462+
],
463+
},
464+
{
465+
"type": "section",
466+
"text": {
467+
"type": "mrkdwn",
468+
"text": f"*Details:*\n```{check_result.details[:500] if check_result.details else 'No details'}```",
469+
},
470+
},
471+
{
472+
"type": "context",
473+
"elements": [
474+
{
475+
"type": "mrkdwn",
476+
"text": f"<https://slack.com/archives/{channel_id}/p{thread_ts.replace('.', '')}|View original thread>",
477+
}
478+
],
479+
},
480+
],
481+
)
482+
support_notified = True
483+
except Exception as e:
484+
import logging
485+
486+
logging.getLogger(__name__).error(
487+
f"Failed to post to support channel: {e}"
488+
)
489+
490+
return json.dumps(
491+
{
492+
"path": check_result.path,
493+
"exists": check_result.exists,
494+
"error": check_result.error,
495+
"support_notified": support_notified,
496+
"message": (
497+
f"Path '{path}' is accessible."
498+
if check_result.exists
499+
else f"Path '{path}' is not accessible: {check_result.error}. "
500+
+ ("Support team has been notified." if support_notified else "")
501+
),
502+
}
503+
)
504+
407505
else:
408506
return json.dumps({"error": f"Unknown tool: {tool_name}"})
409507

src/xfer/slackbot/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ class BotConfig:
6666
allowed_backends_file: Optional[Path] = (
6767
None # Path to YAML/JSON listing allowed backends
6868
)
69+
xfer_install_dir: Optional[Path] = field(
70+
default_factory=lambda: Path(__file__).parent.parent.parent.parent.resolve()
71+
) # Path to xfer git repo for uv sync
6972

7073
# Defaults
7174
slurm: SlurmDefaults = field(default_factory=SlurmDefaults)
@@ -100,6 +103,9 @@ def from_env(cls) -> "BotConfig":
100103
if rclone_config := os.environ.get("XFER_RCLONE_CONFIG"):
101104
config.rclone.config_path = Path(rclone_config)
102105

106+
if xfer_install_dir := os.environ.get("XFER_INSTALL_DIR"):
107+
config.xfer_install_dir = Path(xfer_install_dir)
108+
103109
return config
104110

105111

src/xfer/slackbot/slurm_tools.py

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ def _write_prepare_script(
195195
# User flags to append after analysis-suggested flags
196196
user_rclone_flags = rclone_flags or ""
197197

198+
# Get xfer install directory for uv
199+
xfer_dir = config.xfer_install_dir or Path(__file__).parent.parent.parent.parent.resolve()
200+
198201
script_content = f"""#!/usr/bin/env bash
199202
#SBATCH --job-name={job_name}-prepare
200203
#SBATCH --output={run_dir}/prepare-%j.out
@@ -209,13 +212,21 @@ def _write_prepare_script(
209212
210213
set -euo pipefail
211214
215+
# Setup uv environment
216+
XFER_DIR="{xfer_dir}"
217+
echo "=== Setting up uv environment at $(date -Is) ==="
218+
echo "XFER_DIR: $XFER_DIR"
219+
cd "$XFER_DIR"
220+
uv sync
221+
echo "=== uv sync complete ==="
222+
212223
echo "=== Starting manifest build at $(date -Is) ==="
213224
echo "Source: {source}"
214225
echo "Dest: {dest}"
215226
echo "Run dir: {run_dir}"
216227
217228
# Phase 1: Build manifest
218-
xfer manifest build \\
229+
uv run xfer manifest build \\
219230
--source {shlex.quote(source)} \\
220231
--dest {shlex.quote(dest)} \\
221232
--out {run_dir}/manifest.jsonl \\
@@ -226,7 +237,7 @@ def _write_prepare_script(
226237
227238
# Phase 2: Analyze manifest to determine optimal rclone flags
228239
echo "=== Analyzing file size distribution ==="
229-
xfer manifest analyze \\
240+
uv run xfer manifest analyze \\
230241
--in {run_dir}/manifest.jsonl \\
231242
--out {run_dir}/analysis.json
232243
@@ -247,15 +258,15 @@ def _write_prepare_script(
247258
echo "=== Analysis complete at $(date -Is) ==="
248259
249260
# Phase 3: Shard manifest
250-
xfer manifest shard \\
261+
uv run xfer manifest shard \\
251262
--in {run_dir}/manifest.jsonl \\
252263
--outdir {run_dir}/shards \\
253264
--num-shards {num_shards}
254265
255266
echo "=== Sharding complete at $(date -Is) ==="
256267
257268
# Phase 4: Render Slurm scripts
258-
xfer slurm render \\
269+
uv run xfer slurm render \\
259270
--run-dir {run_dir} \\
260271
--num-shards {num_shards} \\
261272
--array-concurrency {array_concurrency} \\
@@ -273,7 +284,7 @@ def _write_prepare_script(
273284
echo "=== Render complete at $(date -Is) ==="
274285
275286
# Phase 5: Submit transfer array job
276-
xfer slurm submit --run-dir {run_dir}
287+
uv run xfer slurm submit --run-dir {run_dir}
277288
278289
echo "=== Transfer job submitted at $(date -Is) ==="
279290
"""
@@ -779,3 +790,99 @@ def get_source_stats(source: str, config: BotConfig) -> SourceStats:
779790
histogram_text=histogram_text,
780791
error=None,
781792
)
793+
794+
795+
@dataclass
796+
class PathCheckResult:
797+
"""Result of checking if a path exists."""
798+
799+
path: str
800+
exists: bool
801+
error: Optional[str] = None
802+
details: Optional[str] = None
803+
804+
805+
def check_path_exists(path: str, config: BotConfig) -> PathCheckResult:
806+
"""
807+
Check if a bucket/path exists at a remote endpoint.
808+
809+
Uses rclone lsf with --max-depth 0 to check if the path is accessible.
810+
"""
811+
# Validate backend first
812+
valid, msg = validate_backend(path, config)
813+
if not valid:
814+
return PathCheckResult(
815+
path=path,
816+
exists=False,
817+
error=msg,
818+
details="Backend not in allowed list",
819+
)
820+
821+
# Build rclone lsf command to check if path exists
822+
# Using lsf with --max-depth 0 and --dirs-only is a quick way to check
823+
rclone_cmd = [
824+
"rclone",
825+
"lsf",
826+
path,
827+
"--max-depth",
828+
"0",
829+
"--config",
830+
config.rclone.container_conf_path,
831+
]
832+
833+
# Build srun command with container
834+
mounts = f"{config.rclone.config_path}:{config.rclone.container_conf_path}:ro"
835+
836+
srun_cmd = [
837+
"srun",
838+
"-n",
839+
"1",
840+
"-c",
841+
"2",
842+
"--container-image",
843+
config.rclone.image,
844+
"--container-mounts",
845+
mounts,
846+
"--no-container-remap-root",
847+
] + rclone_cmd
848+
849+
try:
850+
result = run_cmd(srun_cmd, capture=True, check=True)
851+
# If command succeeded, path exists
852+
return PathCheckResult(
853+
path=path,
854+
exists=True,
855+
details="Path is accessible",
856+
)
857+
except subprocess.CalledProcessError as e:
858+
error_output = e.stderr or str(e)
859+
860+
# Check for common error patterns
861+
if "NoSuchBucket" in error_output or "bucket does not exist" in error_output.lower():
862+
return PathCheckResult(
863+
path=path,
864+
exists=False,
865+
error="Bucket does not exist",
866+
details=error_output,
867+
)
868+
elif "AccessDenied" in error_output or "access denied" in error_output.lower():
869+
return PathCheckResult(
870+
path=path,
871+
exists=False,
872+
error="Access denied - credentials may not have permission",
873+
details=error_output,
874+
)
875+
elif "NoSuchKey" in error_output or "not found" in error_output.lower():
876+
return PathCheckResult(
877+
path=path,
878+
exists=False,
879+
error="Path does not exist within the bucket",
880+
details=error_output,
881+
)
882+
else:
883+
return PathCheckResult(
884+
path=path,
885+
exists=False,
886+
error=f"Failed to access path: {error_output[:200]}",
887+
details=error_output,
888+
)

0 commit comments

Comments
 (0)