Skip to content

Commit 5b05526

Browse files
committed
✨ Multi-modal agent config
1 parent 4b4624a commit 5b05526

File tree

4 files changed

+117
-53
lines changed

4 files changed

+117
-53
lines changed

sdk/nexent/multi_modal/load_save_object.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import requests
77

88
from .utils import (
9+
UrlType,
910
is_url,
1011
generate_object_name,
1112
detect_content_type_from_bytes,
@@ -36,20 +37,28 @@ def _get_client(self) -> Any:
3637
raise ValueError("Storage client is not initialized.")
3738
return self._storage_client
3839

39-
def download_file_from_url(self, url: str, timeout: int = 30) -> Optional[bytes]:
40+
def download_file_from_url(
41+
self,
42+
url: str,
43+
url_type: UrlType,
44+
timeout: int = 30
45+
) -> Optional[bytes]:
4046
"""
4147
Download file content from S3 URL or HTTP/HTTPS URL as bytes.
4248
"""
4349
if not url:
4450
return None
4551

52+
if not url_type:
53+
raise ValueError("url_type must be provided for download_file_from_url")
54+
4655
try:
47-
if url.startswith(('http://', 'https://')):
56+
if url_type in ("http", "https"):
4857
response = requests.get(url, timeout=timeout)
4958
response.raise_for_status()
5059
return response.content
5160

52-
if url.startswith('s3://') or url.startswith('/'):
61+
if url_type == "s3":
5362
client = self._get_client()
5463
bucket, object_name = parse_s3_url(url)
5564

@@ -68,7 +77,7 @@ def download_file_from_url(self, url: str, timeout: int = 30) -> Optional[bytes]
6877
except Exception as exc:
6978
raise ValueError(f"Failed to read stream content: {exc}") from exc
7079

71-
raise ValueError(f"Unsupported URL format: {url[:50]}...")
80+
raise ValueError(f"Unsupported URL type: {url_type}")
7281

7382
except Exception as exc:
7483
logger.error(f"Failed to download file from URL: {exc}")
@@ -115,22 +124,24 @@ def decorator(func: Callable):
115124
def wrapper(*args, **kwargs):
116125
def _transform_single_value(param_name: str, value: Any,
117126
transformer: Optional[Callable[[bytes], Any]]) -> Any:
118-
if isinstance(value, str) and is_url(value):
119-
bytes_data = self.download_file_from_url(value)
120-
121-
if bytes_data is None:
122-
raise ValueError(f"Failed to download file from URL: {value}")
123-
124-
if transformer:
125-
transformed_data = transformer(bytes_data)
126-
logger.info(
127-
f"Downloaded {param_name} from URL and transformed "
128-
f"using {transformer.__name__}"
129-
)
130-
return transformed_data
131-
132-
logger.info(f"Downloaded {param_name} from URL as bytes (binary stream)")
133-
return bytes_data
127+
if isinstance(value, str):
128+
url_type = is_url(value)
129+
if url_type:
130+
bytes_data = self.download_file_from_url(value, url_type=url_type)
131+
132+
if bytes_data is None:
133+
raise ValueError(f"Failed to download file from URL: {value}")
134+
135+
if transformer:
136+
transformed_data = transformer(bytes_data)
137+
logger.info(
138+
f"Downloaded {param_name} from URL and transformed "
139+
f"using {transformer.__name__}"
140+
)
141+
return transformed_data
142+
143+
logger.info(f"Downloaded {param_name} from URL as bytes (binary stream)")
144+
return bytes_data
134145

135146
raise ValueError(
136147
f"Parameter '{param_name}' is not a URL string. "

sdk/nexent/multi_modal/utils.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,53 @@
22
import logging
33
from datetime import datetime
44
import uuid
5-
from typing import Tuple
5+
from typing import Literal, Optional, Tuple
66
import mimetypes
77
from pathlib import PurePosixPath
88

99

1010
logger = logging.getLogger("multi_modal")
1111

12+
UrlType = Literal["http", "https", "s3"]
1213

13-
def is_url(url: str) -> bool:
14+
15+
def is_url(url: str) -> Optional[UrlType]:
1416
"""
15-
Check if a string is a URL (S3 or HTTP/HTTPS)
17+
Classify a string URL as HTTP(S) or S3.
1618
1719
Args:
18-
url: String to check
20+
url: URL candidate
1921
2022
Returns:
21-
True if it is a URL, False otherwise
23+
'http', 'https', or 's3' when the input matches the respective
24+
scheme. Returns None when the input is not a supported URL.
2225
"""
2326
if not url or not isinstance(url, str):
24-
return False
27+
return None
2528

26-
# Check for HTTP/HTTPS URLs
27-
if url.startswith(("http://", "https://")):
28-
return True
29+
url = url.strip()
2930

30-
# Check for S3 URLs
31-
if url.startswith("s3://"):
32-
return True
31+
if url.startswith("http://"):
32+
return "http"
3333

34-
# Check for MinIO path format: /bucket/key
35-
if url.startswith("/") and len(url.split("/")) >= 3:
36-
return True
34+
if url.startswith("https://"):
35+
return "https"
3736

38-
return False
37+
if url.startswith("s3://"):
38+
bucket_path = url.replace("s3://", "", 1)
39+
bucket_object = bucket_path.split("/", 1)
40+
if len(bucket_object) == 2 and all(bucket_object):
41+
return "s3"
42+
return None
43+
44+
if url.startswith("/"):
45+
stripped = url.lstrip("/")
46+
parts = stripped.split("/", 1)
47+
if len(parts) == 2 and all(parts):
48+
return "s3"
49+
return None
50+
51+
return None
3952

4053

4154
def bytes_to_base64(bytes_data: bytes, content_type: str = "application/octet-stream") -> str:

test/sdk/multi_modal/test_load_save_object.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def raise_for_status(self):
3737
return None
3838

3939
monkeypatch.setattr(lso.requests, "get", lambda url, timeout: _Response())
40-
data = manager.download_file_from_url("https://example.com/file.png")
40+
data = manager.download_file_from_url(
41+
"https://example.com/file.png",
42+
url_type="https",
43+
)
4144
assert data == b"binary"
4245

4346

@@ -49,7 +52,7 @@ def get_file_stream(self, object_name: str, bucket: str) -> Tuple[bool, Any]:
4952
return True, io.BytesIO(b"payload")
5053

5154
manager = make_manager(_FakeClient())
52-
data = manager.download_file_from_url("s3://bucket/path/to/object")
55+
data = manager.download_file_from_url("s3://bucket/path/to/object", url_type="s3")
5356
assert data == b"payload"
5457

5558

@@ -59,25 +62,26 @@ def get_file_stream(self, object_name: str, bucket: str):
5962
return False, "boom"
6063

6164
manager = make_manager(_FailingClient())
62-
assert manager.download_file_from_url("s3://bucket/object") is None
65+
assert manager.download_file_from_url("s3://bucket/object", url_type="s3") is None
6366

6467

6568
def test_download_file_from_s3_missing_method_returns_none():
6669
class _InvalidClient:
6770
pass
6871

6972
manager = make_manager(_InvalidClient())
70-
assert manager.download_file_from_url("s3://bucket/object") is None
73+
assert manager.download_file_from_url("s3://bucket/object", url_type="s3") is None
7174

7275

73-
def test_download_file_with_unsupported_scheme_returns_none():
76+
def test_download_file_requires_url_type():
7477
manager = make_manager()
75-
assert manager.download_file_from_url("ftp://unsupported/path") is None
78+
with pytest.raises(ValueError):
79+
manager.download_file_from_url("https://example.com/file.png", url_type=None) # type: ignore[arg-type]
7680

7781

7882
def test_download_file_empty_url_returns_none():
7983
manager = make_manager()
80-
assert manager.download_file_from_url("") is None
84+
assert manager.download_file_from_url("", url_type="https") is None
8185

8286

8387
def test_download_file_stream_read_failure(monkeypatch):
@@ -93,7 +97,7 @@ def get_file_stream(self, object_name: str, bucket: str):
9397
return True, _FailingStream()
9498

9599
manager = make_manager(_Client())
96-
assert manager.download_file_from_url("s3://bucket/object") is None
100+
assert manager.download_file_from_url("s3://bucket/object", url_type="s3") is None
97101

98102

99103
def test_upload_bytes_to_minio_generates_object_name(monkeypatch):
@@ -195,7 +199,7 @@ def handler(image):
195199

196200
result = handler("https://example.com/img.png")
197201

198-
download_mock.assert_called_once_with("https://example.com/img.png")
202+
download_mock.assert_called_once_with("https://example.com/img.png", url_type="https")
199203
assert result == b"file-bytes"
200204

201205

@@ -242,7 +246,7 @@ def handler(image, other=None):
242246
return image, other
243247

244248
result = handler("https://example.com/a.png")
245-
download_mock.assert_called_once_with("https://example.com/a.png")
249+
download_mock.assert_called_once_with("https://example.com/a.png", url_type="https")
246250
assert result == (b"bytes", None)
247251

248252

test/sdk/multi_modal/test_utils.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,18 @@
66

77

88
def test_is_url_variants():
9-
assert utils.is_url("https://example.com/image.png")
10-
assert utils.is_url("s3://bucket/key")
11-
assert utils.is_url("/bucket/key")
12-
assert not utils.is_url("not-a-url")
13-
assert not utils.is_url(123) # type: ignore[arg-type]
9+
assert utils.is_url("https://example.com/image.png") == "https"
10+
assert utils.is_url("http://example.com/image.png") == "http"
11+
assert utils.is_url("s3://bucket/key") == "s3"
12+
assert utils.is_url("/bucket/key") == "s3"
13+
assert utils.is_url("not-a-url") is None
14+
assert utils.is_url(123) is None # type: ignore[arg-type]
1415

1516

1617
def test_is_url_requires_bucket_and_key():
17-
assert not utils.is_url("/bucket")
18-
assert not utils.is_url("")
18+
assert utils.is_url("/bucket") is None
19+
assert utils.is_url("s3://bucket/") is None
20+
assert utils.is_url("") is None
1921

2022

2123
def test_bytes_to_base64_and_back():
@@ -167,3 +169,37 @@ def test_parse_s3_url_requires_object_name():
167169
utils.parse_s3_url("/bucket")
168170

169171

172+
def test_base64_to_bytes_header_without_base64_flag():
173+
payload = base64.b64encode(b"json-bytes").decode("utf-8")
174+
decoded, content_type = utils.base64_to_bytes(
175+
f"data:application/json,{payload}"
176+
)
177+
assert decoded == b"json-bytes"
178+
assert content_type == "application/json"
179+
180+
181+
@pytest.mark.parametrize(
182+
("payload", "expected"),
183+
[
184+
(b"\x00\x00\x00 qt " + b"\x00" * 6, "video/quicktime"),
185+
(b"OggS" + b"\x00" * 8, "audio/ogg"),
186+
(b"fLaC" + b"\x00" * 8, "audio/flac"),
187+
(b"\x1a\x45\xdf\xa3" + b"\x00" * 8, "video/webm"),
188+
(b"RIFF" + b"\x00" * 4 + b"AVI ", "video/x-msvideo"),
189+
],
190+
)
191+
def test_detect_content_type_expanded_signatures(payload: bytes, expected: str):
192+
assert utils.detect_content_type_from_bytes(payload) == expected
193+
194+
195+
def test_detect_content_type_mp3_frame_sync():
196+
payload = b"\xff\xfb" + b"\x00" * 4
197+
assert utils.detect_content_type_from_bytes(payload) == "audio/mpeg"
198+
199+
200+
@pytest.mark.parametrize("value", ["", None])
201+
def test_parse_s3_url_rejects_empty(value):
202+
with pytest.raises(ValueError):
203+
utils.parse_s3_url(value) # type: ignore[arg-type]
204+
205+

0 commit comments

Comments
 (0)