Skip to content

Commit fdb4c49

Browse files
authored
Add URL opening utilities (#142)
1 parent c012b7f commit fdb4c49

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

src/pystow/utils/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
stream_write_pydantic_jsonl,
8989
write_pydantic_jsonl,
9090
)
91-
from .safe_open import open_inner_zipfile, safe_open, safe_open_dict_reader
91+
from .safe_open import is_url, open_inner_zipfile, open_url, safe_open, safe_open_dict_reader
9292
from ..constants import README_TEXT, TimeoutHint
9393

9494
if TYPE_CHECKING:
@@ -134,6 +134,7 @@
134134
"getenv_path",
135135
"gunzip",
136136
"gzip_compress",
137+
"is_url",
137138
"iter_pydantic_jsonl",
138139
"iter_pydantic_tsv",
139140
"iter_tarred_csvs",
@@ -148,6 +149,7 @@
148149
"name_from_url",
149150
"open_inner_zipfile",
150151
"open_tarfile",
152+
"open_url",
151153
"open_zip_reader",
152154
"open_zip_writer",
153155
"open_zipfile",

src/pystow/utils/safe_open.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
import gzip
88
import io
99
import typing
10+
import urllib.request
1011
import zipfile
1112
from collections.abc import Generator, Mapping
1213
from pathlib import Path
13-
from typing import Any, Literal, TextIO, cast
14+
from typing import Any, BinaryIO, Literal, TextIO, cast, overload
1415

1516
from .io_typing import (
1617
_MODE_TO_SIMPLE,
@@ -26,7 +27,9 @@
2627
)
2728

2829
__all__ = [
30+
"is_url",
2931
"open_inner_zipfile",
32+
"open_url",
3033
"safe_open",
3134
"safe_open_dict_reader",
3235
]
@@ -195,3 +198,39 @@ def safe_open_dict_reader(
195198
"""
196199
with safe_open(f, operation="read", representation="text") as file:
197200
yield csv.DictReader(file, delimiter=delimiter, **kwargs)
201+
202+
203+
def is_url(s: str | Path | TextIO | Any) -> bool:
204+
"""Check if the object is a URL."""
205+
if isinstance(s, str) and (s.startswith("http://") or s.startswith("https://")):
206+
return True
207+
return False
208+
209+
210+
# docstr-coverage:excused `overload`
211+
@overload
212+
@contextlib.contextmanager
213+
def open_url(
214+
url: str, *, representation: Literal["text"] = ...
215+
) -> Generator[TextIO, None, None]: ...
216+
217+
218+
# docstr-coverage:excused `overload`
219+
@overload
220+
@contextlib.contextmanager
221+
def open_url(
222+
url: str, *, representation: Literal["binary"] = ...
223+
) -> Generator[BinaryIO, None, None]: ...
224+
225+
226+
@contextlib.contextmanager
227+
def open_url(
228+
url: str, *, representation: Representation = "text"
229+
) -> Generator[TextIO, None, None] | Generator[BinaryIO, None, None]:
230+
"""Get a file-like object from a URL."""
231+
with urllib.request.urlopen(url) as response: # noqa:S310
232+
match representation:
233+
case "text":
234+
yield io.TextIOWrapper(response, encoding="utf-8")
235+
case "binary":
236+
yield io.BufferedReader(response)

tests/test_utils/test_utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828
getenv_path,
2929
gunzip,
3030
gzip_compress,
31+
is_url,
3132
iter_tarred_csvs,
3233
iter_zipped_csvs,
3334
mkdir,
3435
mock_envvar,
3536
n,
3637
name_from_url,
3738
open_tarfile,
39+
open_url,
3840
open_zip_reader,
3941
open_zip_writer,
4042
open_zipfile,
@@ -475,6 +477,27 @@ def test_gzip(self) -> None:
475477
self.assertTrue(path_new.is_file())
476478
self.assertEqual(TEST_TXT_CONTENT, path_new.read_text(encoding="utf-8"))
477479

480+
def test_is_url(self) -> None:
481+
"""Test checking URL."""
482+
self.assertTrue(is_url("https://zenodo.org/records/15504009/files/startup.sh?download=1"))
483+
self.assertTrue(is_url("https://zenodo.org/records/15504009/files/startup.sh"))
484+
self.assertTrue(is_url("http://zenodo.org/records/15504009/files/startup.sh"))
485+
self.assertFalse(is_url("ftp://zenodo.org/records/15504009/files/startup.sh"))
486+
self.assertFalse(is_url("nope"))
487+
self.assertFalse(is_url(Path().cwd()))
488+
489+
def test_open_url(self) -> None:
490+
"""Test opening a URL."""
491+
with open_url(
492+
"https://zenodo.org/records/15504009/files/startup.sh", representation="text"
493+
) as file:
494+
self.assertIn("sleep 5", file.read())
495+
496+
with open_url(
497+
"https://zenodo.org/records/15504009/files/startup.sh", representation="binary"
498+
) as file:
499+
self.assertIn("sleep 5", file.read().decode("utf-8"))
500+
478501

479502
class TestDownload(unittest.TestCase):
480503
"""Tests for downloading."""

0 commit comments

Comments
 (0)