Skip to content

Commit fad7183

Browse files
authored
Open URLs with safe open (#143)
1 parent bcc4cc3 commit fad7183

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

src/pystow/utils/safe_open.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import zipfile
1212
from collections.abc import Generator, Mapping
1313
from pathlib import Path
14-
from typing import Any, BinaryIO, Literal, TextIO, cast, overload
14+
from typing import Any, BinaryIO, Literal, TextIO, TypeGuard, cast, overload
1515

1616
from .io_typing import (
1717
_MODE_TO_SIMPLE,
@@ -87,7 +87,7 @@ def safe_open(
8787

8888

8989
@contextlib.contextmanager
90-
def safe_open(
90+
def safe_open( # noqa:C901
9191
path: str | Path | typing.TextIO | typing.BinaryIO,
9292
*,
9393
operation: Operation = "read",
@@ -102,16 +102,26 @@ def safe_open(
102102
raise InvalidRepresentationError(representation)
103103

104104
if isinstance(path, (str, Path)):
105-
mode = MODE_MAP[operation, representation]
106105
encoding = ensure_sensible_default_encoding(encoding, representation=representation)
107106
newline = ensure_sensible_newline(newline, representation=representation)
108-
path = Path(path).expanduser().resolve()
109-
if path.suffix.endswith(".gz"):
110-
with gzip.open(path, mode=mode, encoding=encoding, newline=newline) as file:
111-
yield file # type:ignore
107+
108+
if is_url(path):
109+
if operation != "read":
110+
raise ValueError('can only use operation="read" with URLs')
111+
with open_url(
112+
path, representation=representation, encoding=encoding, newline=newline
113+
) as file:
114+
yield file
112115
else:
113-
with open(path, mode=mode, encoding=encoding, newline=newline) as file:
114-
yield file # type:ignore
116+
mode = MODE_MAP[operation, representation]
117+
path = Path(path).expanduser().resolve()
118+
if path.suffix.endswith(".gz"):
119+
with gzip.open(path, mode=mode, encoding=encoding, newline=newline) as file:
120+
yield file # type:ignore
121+
else:
122+
with open(path, mode=mode, encoding=encoding, newline=newline) as file:
123+
yield file # type:ignore
124+
115125
elif isinstance(path, typing.TextIO | io.TextIOWrapper | io.TextIOBase):
116126
if representation != "text":
117127
raise ValueError(
@@ -200,7 +210,7 @@ def safe_open_dict_reader(
200210
yield csv.DictReader(file, delimiter=delimiter, **kwargs)
201211

202212

203-
def is_url(s: str | Path | TextIO | Any) -> bool:
213+
def is_url(s: str | Path | TextIO | Any) -> TypeGuard[str]:
204214
"""Check if the object is a URL."""
205215
if isinstance(s, str) and (s.startswith("http://") or s.startswith("https://")):
206216
return True
@@ -211,26 +221,38 @@ def is_url(s: str | Path | TextIO | Any) -> bool:
211221
@overload
212222
@contextlib.contextmanager
213223
def open_url(
214-
url: str, *, representation: Literal["text"] = ...
224+
url: str,
225+
*,
226+
representation: Literal["text"] = ...,
227+
encoding: str | None = ...,
228+
newline: str | None = ...,
215229
) -> Generator[TextIO, None, None]: ...
216230

217231

218232
# docstr-coverage:excused `overload`
219233
@overload
220234
@contextlib.contextmanager
221235
def open_url(
222-
url: str, *, representation: Literal["binary"] = ...
236+
url: str,
237+
*,
238+
representation: Literal["binary"] = ...,
239+
encoding: str | None = ...,
240+
newline: str | None = ...,
223241
) -> Generator[BinaryIO, None, None]: ...
224242

225243

226244
@contextlib.contextmanager
227245
def open_url(
228-
url: str, *, representation: Representation = "text"
246+
url: str,
247+
*,
248+
representation: Representation = "text",
249+
encoding: str | None = None,
250+
newline: str | None = None,
229251
) -> Generator[TextIO, None, None] | Generator[BinaryIO, None, None]:
230252
"""Get a file-like object from a URL."""
231253
with urllib.request.urlopen(url) as response: # noqa:S310
232254
match representation:
233255
case "text":
234-
yield io.TextIOWrapper(response, encoding="utf-8")
256+
yield io.TextIOWrapper(response, encoding=encoding, newline=newline)
235257
case "binary":
236258
yield io.BufferedReader(response)

tests/test_utils/test_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,11 @@ def test_safe_open_exceptions(self) -> None:
391391
with safe_open(passthrough, representation="binary") as _file:
392392
pass
393393

394+
url = "https://zenodo.org/records/15504009/files/startup.sh"
395+
with self.assertRaises(ValueError):
396+
with safe_open(url, operation="write") as _file:
397+
pass
398+
394399
def test_safe_open_binary(self) -> None:
395400
"""Test safe open in binary mode."""
396401
for path in [TEST_TXT, TEST_TXT_GZ]:
@@ -410,6 +415,13 @@ def test_safe_open_binary(self) -> None:
410415
msg=f"failed to read bytes from {path} in a passthrough scenario",
411416
)
412417

418+
def test_safe_open_url_binary(self) -> None:
419+
"""Test safe open in URL mode."""
420+
with safe_open(
421+
"https://zenodo.org/records/15504009/files/startup.sh", representation="binary"
422+
) as file:
423+
self.assertIn("sleep 5", file.read().decode("utf-8"))
424+
413425
def test_safe_open_text(self) -> None:
414426
"""Test safe open in text mode."""
415427
for path, encoding, newline in itt.product(
@@ -433,6 +445,13 @@ def test_safe_open_text(self) -> None:
433445
msg=f"failed to read text from {path} in a passthrough scenario",
434446
)
435447

448+
def test_safe_open_url_text(self) -> None:
449+
"""Test safe open in URL mode."""
450+
with safe_open(
451+
"https://zenodo.org/records/15504009/files/startup.sh", representation="text"
452+
) as file:
453+
self.assertIn("sleep 5", file.read())
454+
436455
def test_encodings(self) -> None:
437456
"""Test I/O in different encodings."""
438457
for encoding in ["ascii", "utf-16-be", "CP1252"]:

0 commit comments

Comments
 (0)