Skip to content

Commit 8294298

Browse files
authored
Merge pull request #6 from DesignSafe-CI/dev
Fixes #4 issue with tapis filenames have spaces
2 parents 59e9d55 + e8b589a commit 8294298

File tree

13 files changed

+769
-768
lines changed

13 files changed

+769
-768
lines changed

README.md

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,4 @@ poetry install
148148
poetry run mkdocs serve
149149
```
150150

151-
This will start a local server at `http://127.0.0.1:8000/dapi/` where you can view the documentation.
152-
153-
### API docs
154-
To generate API docs:
155-
156-
```
157-
pdoc --html --output-dir api-docs dapi --force
158-
```
151+
This will start a local server at `http://127.0.0.1:8000/dapi/` where you can view the documentation.

dapi/files.py

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,36 @@
1111
from typing import List
1212

1313

14+
def _safe_quote(path: str) -> str:
15+
"""Safely URL-encode a path, avoiding double encoding.
16+
17+
Args:
18+
path (str): The path to encode
19+
20+
Returns:
21+
str: URL-encoded path
22+
23+
Example:
24+
>>> _safe_quote("folder with spaces")
25+
'folder%20with%20spaces'
26+
>>> _safe_quote("folder%20with%20spaces") # Already encoded
27+
'folder%20with%20spaces'
28+
"""
29+
# Check if the path appears to be already URL-encoded
30+
# by trying to decode it and seeing if it changes
31+
try:
32+
decoded = urllib.parse.unquote(path)
33+
if decoded != path:
34+
# Path was URL-encoded, return as-is to avoid double encoding
35+
return path
36+
else:
37+
# Path was not URL-encoded, encode it
38+
return urllib.parse.quote(path)
39+
except Exception:
40+
# If there's any error in decoding, just encode the original path
41+
return urllib.parse.quote(path)
42+
43+
1444
# _parse_tapis_uri helper remains the same
1545
def _parse_tapis_uri(tapis_uri: str) -> (str, str):
1646
"""Parse a Tapis URI into system ID and path components.
@@ -19,7 +49,7 @@ def _parse_tapis_uri(tapis_uri: str) -> (str, str):
1949
tapis_uri (str): URI in the format 'tapis://system_id/path'.
2050
2151
Returns:
22-
tuple: A tuple containing (system_id, path) where path is URL-decoded.
52+
tuple: A tuple containing (system_id, path).
2353
2454
Raises:
2555
ValueError: If the URI format is invalid or missing required components.
@@ -190,8 +220,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
190220
)
191221
else:
192222
tapis_path = path_remainder
193-
encoded_path = urllib.parse.quote(tapis_path)
194-
input_uri = f"tapis://{storage_system_id}/{encoded_path}"
223+
input_uri = f"tapis://{storage_system_id}/{tapis_path}"
195224
print(f"Translated '{path}' to '{input_uri}' using t.username")
196225
break # Found match, exit loop
197226

@@ -206,8 +235,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
206235
if pattern in path:
207236
path_remainder = path.split(pattern, 1)[1].lstrip("/")
208237
tapis_path = path_remainder
209-
encoded_path = urllib.parse.quote(tapis_path)
210-
input_uri = f"tapis://{storage_system_id}/{encoded_path}"
238+
input_uri = f"tapis://{storage_system_id}/{tapis_path}"
211239
print(f"Translated '{path}' to '{input_uri}'")
212240
break # Found match, exit loop
213241

@@ -295,8 +323,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
295323
f"Could not resolve project ID '{project_id_part}' to a Tapis system ID."
296324
)
297325

298-
encoded_path_within_project = urllib.parse.quote(path_within_project)
299-
input_uri = f"tapis://{found_system_id}/{encoded_path_within_project}"
326+
input_uri = f"tapis://{found_system_id}/{path_within_project}"
300327
print(f"Translated '{path}' to '{input_uri}' using Tapis v3 lookup")
301328
break # Found match, exit loop
302329

@@ -316,26 +343,26 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
316343
print(f"Verifying existence of translated path: {input_uri}")
317344
try:
318345
system_id, remote_path = _parse_tapis_uri(input_uri)
319-
# Decode the path part for the listFiles call, as it expects unencoded paths
320-
decoded_remote_path = urllib.parse.unquote(remote_path)
321-
print(f"Checking system '{system_id}' for path '{decoded_remote_path}'...")
346+
# The Tapis API expects URL-encoded paths when they contain spaces or special characters
347+
encoded_remote_path = _safe_quote(remote_path)
348+
print(f"Checking system '{system_id}' for path '{remote_path}'...")
322349
# Use limit=1 for efficiency, we only care if it *exists*
323350
# Note: listFiles might return successfully for the *parent* directory
324351
# if the final component doesn't exist. A more robust check might
325352
# involve checking the result count or specific item name, but this
326353
# basic check catches non-existent parent directories.
327-
t.files.listFiles(systemId=system_id, path=decoded_remote_path, limit=1)
354+
t.files.listFiles(systemId=system_id, path=encoded_remote_path, limit=1)
328355
print(f"Verification successful: Path exists.")
329356
except BaseTapyException as e:
330357
# Specifically check for 404 on the listFiles call
331358
if hasattr(e, "response") and e.response and e.response.status_code == 404:
332359
raise FileOperationError(
333-
f"Verification failed: Path '{decoded_remote_path}' does not exist on system '{system_id}'. Translated URI: {input_uri}"
360+
f"Verification failed: Path '{remote_path}' does not exist on system '{system_id}'. Translated URI: {input_uri}"
334361
) from e
335362
else:
336363
# Re-raise other Tapis errors encountered during verification
337364
raise FileOperationError(
338-
f"Verification error for path '{decoded_remote_path}' on system '{system_id}': {e}"
365+
f"Verification error for path '{remote_path}' on system '{system_id}': {e}"
339366
) from e
340367
except (
341368
ValueError
@@ -379,8 +406,12 @@ def upload_file(t: Tapis, local_path: str, remote_uri: str):
379406
print(
380407
f"Uploading '{local_path}' to system '{system_id}' at path '{dest_path}'..."
381408
)
409+
# URL-encode the destination path for API call
410+
encoded_dest_path = _safe_quote(dest_path)
382411
t.upload(
383-
system_id=system_id, source_file_path=local_path, dest_file_path=dest_path
412+
system_id=system_id,
413+
source_file_path=local_path,
414+
dest_file_path=encoded_dest_path,
384415
)
385416
print("Upload complete.")
386417
except BaseTapyException as e:
@@ -424,8 +455,10 @@ def download_file(t: Tapis, remote_uri: str, local_path: str):
424455
os.makedirs(local_dir, exist_ok=True)
425456
# Use getContents which returns the raw bytes
426457
# Set stream=True for potentially large files
458+
# URL-encode the source path for API call
459+
encoded_source_path = _safe_quote(source_path)
427460
response = t.files.getContents(
428-
systemId=system_id, path=source_path, stream=True
461+
systemId=system_id, path=encoded_source_path, stream=True
429462
)
430463

431464
# Write the streamed content to the local file
@@ -477,8 +510,10 @@ def list_files(
477510
try:
478511
system_id, path = _parse_tapis_uri(remote_uri)
479512
print(f"Listing files in system '{system_id}' at path '{path}'...")
513+
# URL-encode the path for API call
514+
encoded_path = _safe_quote(path)
480515
results = t.files.listFiles(
481-
systemId=system_id, path=path, limit=limit, offset=offset
516+
systemId=system_id, path=encoded_path, limit=limit, offset=offset
482517
)
483518
print(f"Found {len(results)} items.")
484519
return results

dapi/jobs.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,9 +1007,7 @@ def archive_uri(self) -> Optional[str]:
10071007
details = self._get_details()
10081008
if details.archiveSystemId and details.archiveSystemDir:
10091009
archive_path = details.archiveSystemDir.lstrip("/")
1010-
return (
1011-
f"tapis://{details.archiveSystemId}/{urllib.parse.quote(archive_path)}"
1012-
)
1010+
return f"tapis://{details.archiveSystemId}/{archive_path}"
10131011
return None
10141012

10151013
def list_outputs(
@@ -1048,7 +1046,7 @@ def list_outputs(
10481046
full_archive_path = os.path.join(details.archiveSystemDir, path.lstrip("/"))
10491047
full_archive_path = os.path.normpath(full_archive_path).lstrip("/")
10501048
try:
1051-
archive_base_uri = f"tapis://{details.archiveSystemId}/{urllib.parse.quote(full_archive_path)}"
1049+
archive_base_uri = f"tapis://{details.archiveSystemId}/{full_archive_path}"
10521050
from .files import list_files
10531051

10541052
return list_files(self._tapis, archive_base_uri, limit=limit, offset=offset)
@@ -1084,9 +1082,7 @@ def download_output(self, remote_path: str, local_target: str):
10841082
details.archiveSystemDir, remote_path.lstrip("/")
10851083
)
10861084
full_archive_path = os.path.normpath(full_archive_path).lstrip("/")
1087-
remote_uri = (
1088-
f"tapis://{details.archiveSystemId}/{urllib.parse.quote(full_archive_path)}"
1089-
)
1085+
remote_uri = f"tapis://{details.archiveSystemId}/{full_archive_path}"
10901086
try:
10911087
from .files import download_file
10921088

0 commit comments

Comments
 (0)