Skip to content

Commit e8b589a

Browse files
committed
Make encoding consitent
1 parent 69d6e7d commit e8b589a

File tree

2 files changed

+139
-4
lines changed

2 files changed

+139
-4
lines changed

dapi/files.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,36 @@
1111
from typing import List
1212

1313

14+
def _safe_quote(path: str) -> str:
15+
"""Safely URL-encode a path, avoiding double encoding.
16+
17+
Args:
18+
path (str): The path to encode
19+
20+
Returns:
21+
str: URL-encoded path
22+
23+
Example:
24+
>>> _safe_quote("folder with spaces")
25+
'folder%20with%20spaces'
26+
>>> _safe_quote("folder%20with%20spaces") # Already encoded
27+
'folder%20with%20spaces'
28+
"""
29+
# Check if the path appears to be already URL-encoded
30+
# by trying to decode it and seeing if it changes
31+
try:
32+
decoded = urllib.parse.unquote(path)
33+
if decoded != path:
34+
# Path was URL-encoded, return as-is to avoid double encoding
35+
return path
36+
else:
37+
# Path was not URL-encoded, encode it
38+
return urllib.parse.quote(path)
39+
except Exception:
40+
# If there's any error in decoding, just encode the original path
41+
return urllib.parse.quote(path)
42+
43+
1444
# _parse_tapis_uri helper remains the same
1545
def _parse_tapis_uri(tapis_uri: str) -> (str, str):
1646
"""Parse a Tapis URI into system ID and path components.
@@ -314,7 +344,7 @@ def get_ds_path_uri(t: Tapis, path: str, verify_exists: bool = False) -> str:
314344
try:
315345
system_id, remote_path = _parse_tapis_uri(input_uri)
316346
# The Tapis API expects URL-encoded paths when they contain spaces or special characters
317-
encoded_remote_path = urllib.parse.quote(remote_path)
347+
encoded_remote_path = _safe_quote(remote_path)
318348
print(f"Checking system '{system_id}' for path '{remote_path}'...")
319349
# Use limit=1 for efficiency, we only care if it *exists*
320350
# Note: listFiles might return successfully for the *parent* directory
@@ -376,8 +406,12 @@ def upload_file(t: Tapis, local_path: str, remote_uri: str):
376406
print(
377407
f"Uploading '{local_path}' to system '{system_id}' at path '{dest_path}'..."
378408
)
409+
# URL-encode the destination path for API call
410+
encoded_dest_path = _safe_quote(dest_path)
379411
t.upload(
380-
system_id=system_id, source_file_path=local_path, dest_file_path=dest_path
412+
system_id=system_id,
413+
source_file_path=local_path,
414+
dest_file_path=encoded_dest_path,
381415
)
382416
print("Upload complete.")
383417
except BaseTapyException as e:
@@ -421,8 +455,10 @@ def download_file(t: Tapis, remote_uri: str, local_path: str):
421455
os.makedirs(local_dir, exist_ok=True)
422456
# Use getContents which returns the raw bytes
423457
# Set stream=True for potentially large files
458+
# URL-encode the source path for API call
459+
encoded_source_path = _safe_quote(source_path)
424460
response = t.files.getContents(
425-
systemId=system_id, path=source_path, stream=True
461+
systemId=system_id, path=encoded_source_path, stream=True
426462
)
427463

428464
# Write the streamed content to the local file
@@ -474,8 +510,10 @@ def list_files(
474510
try:
475511
system_id, path = _parse_tapis_uri(remote_uri)
476512
print(f"Listing files in system '{system_id}' at path '{path}'...")
513+
# URL-encode the path for API call
514+
encoded_path = _safe_quote(path)
477515
results = t.files.listFiles(
478-
systemId=system_id, path=path, limit=limit, offset=offset
516+
systemId=system_id, path=encoded_path, limit=limit, offset=offset
479517
)
480518
print(f"Found {len(results)} items.")
481519
return results
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import unittest
2+
from unittest.mock import MagicMock, Mock
3+
from dapi.files import _safe_quote, _parse_tapis_uri, get_ds_path_uri
4+
from tapipy.tapis import Tapis
5+
import urllib.parse
6+
7+
8+
class TestEncodingConsistency(unittest.TestCase):
9+
"""Test encoding consistency and double-encoding prevention."""
10+
11+
def test_safe_quote_prevents_double_encoding(self):
12+
"""Test that _safe_quote prevents double encoding."""
13+
# Test with unencoded path
14+
unencoded = "folder with spaces"
15+
encoded_once = _safe_quote(unencoded)
16+
encoded_twice = _safe_quote(encoded_once)
17+
18+
self.assertEqual(encoded_once, "folder%20with%20spaces")
19+
self.assertEqual(encoded_twice, encoded_once) # Should not double encode
20+
21+
def test_safe_quote_with_multiple_spaces(self):
22+
"""Test _safe_quote with multiple spaces."""
23+
test_cases = [
24+
("folder with spaces", "folder%20with%20spaces"),
25+
("folder%20with%20spaces", "folder%20with%20spaces"), # Already encoded
26+
("normal_folder", "normal_folder"),
27+
("path/with spaces/here", "path/with%20spaces/here"),
28+
(
29+
"path%2Fwith%20spaces%2Fhere",
30+
"path%2Fwith%20spaces%2Fhere",
31+
), # Already encoded
32+
]
33+
34+
for input_path, expected in test_cases:
35+
with self.subTest(input_path=input_path):
36+
result = _safe_quote(input_path)
37+
self.assertEqual(result, expected)
38+
39+
def test_uri_generation_uses_spaces(self):
40+
"""Test that URI generation creates URIs with spaces, not %20."""
41+
mock_tapis = MagicMock(spec=Tapis)
42+
mock_tapis.username = "testuser"
43+
44+
path = "jupyter/MyData/folder with spaces/file.txt"
45+
uri = get_ds_path_uri(mock_tapis, path)
46+
47+
# URI should contain actual spaces
48+
self.assertIn("folder with spaces", uri)
49+
self.assertNotIn("folder%20with%20spaces", uri)
50+
self.assertEqual(
51+
uri,
52+
"tapis://designsafe.storage.default/testuser/folder with spaces/file.txt",
53+
)
54+
55+
def test_uri_parsing_handles_spaces(self):
56+
"""Test that URI parsing correctly handles URIs with spaces."""
57+
uri_with_spaces = (
58+
"tapis://designsafe.storage.default/testuser/folder with spaces/file.txt"
59+
)
60+
system_id, path = _parse_tapis_uri(uri_with_spaces)
61+
62+
self.assertEqual(system_id, "designsafe.storage.default")
63+
self.assertEqual(path, "testuser/folder with spaces/file.txt")
64+
65+
def test_uri_parsing_handles_encoded_uris(self):
66+
"""Test that URI parsing correctly handles pre-encoded URIs."""
67+
uri_with_encoding = "tapis://designsafe.storage.default/testuser/folder%20with%20spaces/file.txt"
68+
system_id, path = _parse_tapis_uri(uri_with_encoding)
69+
70+
self.assertEqual(system_id, "designsafe.storage.default")
71+
# Should return the path as-is (with %20) since we no longer decode
72+
self.assertEqual(path, "testuser/folder%20with%20spaces/file.txt")
73+
74+
def test_round_trip_consistency(self):
75+
"""Test that URI generation and parsing are consistent."""
76+
mock_tapis = MagicMock(spec=Tapis)
77+
mock_tapis.username = "testuser"
78+
79+
original_path = "jupyter/MyData/folder with spaces/file.txt"
80+
81+
# Generate URI
82+
uri = get_ds_path_uri(mock_tapis, original_path)
83+
84+
# Parse URI back
85+
system_id, parsed_path = _parse_tapis_uri(uri)
86+
87+
# The parsed path should match the expected Tapis path
88+
expected_tapis_path = "testuser/folder with spaces/file.txt"
89+
self.assertEqual(parsed_path, expected_tapis_path)
90+
91+
# Safe quote should properly encode for API calls
92+
api_path = _safe_quote(parsed_path)
93+
self.assertEqual(api_path, "testuser/folder%20with%20spaces/file.txt")
94+
95+
96+
if __name__ == "__main__":
97+
unittest.main()

0 commit comments

Comments
 (0)