Skip to content

Commit a751042

Browse files
author
Thomas Gilgenast
committed
make github _open() return HTTPFile when getting content from download_url
1 parent c085e2d commit a751042

File tree

3 files changed

+29
-9
lines changed

3 files changed

+29
-9
lines changed

fsspec/implementations/github.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import requests
44

5+
from ..asyn import get_loop, sync
56
from ..spec import AbstractFileSystem
67
from ..utils import infer_storage_options
8+
from .http import HTTPFile, HTTPFileSystem
79
from .memory import MemoryFile
810

911
# TODO: add GIST backend, would be very similar
@@ -64,6 +66,11 @@ def __init__(
6466
self.root = sha
6567
self.ls("")
6668

69+
# prepare elements needed to return HTTPFile
70+
self.http_fs = HTTPFileSystem(**kwargs)
71+
self.loop = get_loop()
72+
self.session = sync(self.loop, self.http_fs.set_session)
73+
6774
@property
6875
def kw(self):
6976
if self.username:
@@ -245,9 +252,16 @@ def _open(
245252

246253
# we land here if the content was not present in the first response
247254
# (regular file over 1MB or git-lfs tracked file)
248-
# in this case, we get the content from the download_url
249-
r = requests.get(content_json["download_url"], timeout=self.timeout, **self.kw)
250-
if r.status_code == 404:
251-
raise FileNotFoundError(path)
252-
r.raise_for_status()
253-
return MemoryFile(None, None, r.content)
255+
# in this case, we get return an HTTPFile object wrapping the
256+
# download_url
257+
return HTTPFile(
258+
self.http_fs,
259+
content_json["download_url"],
260+
session=self.session,
261+
block_size=block_size,
262+
autocommit=autocommit,
263+
cache_options=cache_options,
264+
size=content_json["size"],
265+
loop=self.loop,
266+
**kwargs,
267+
)

fsspec/implementations/tests/test_github.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ def test_github_open_small_file():
99

1010
def test_github_open_large_file():
1111
# test opening a large file >1 MB
12-
with fsspec.open("github://mwaskom:seaborn-data@83bfba7/brain_networks.csv") as f:
13-
assert f.readline().startswith(b"network,1,1,2,2")
12+
# use block_size=0 to get a streaming interface to the file, ensuring that
13+
# we fetch only the parts we need instead of downloading the full file all
14+
# at once
15+
with fsspec.open(
16+
"github://mwaskom:seaborn-data@83bfba7/brain_networks.csv", block_size=0
17+
) as f:
18+
# read only the first 20 bytes of the file
19+
assert f.read(20).startswith(b"network,1,1,2,2,3,3,")
1420

1521

1622
def test_github_open_lfs_file():

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ full = [
5656
fuse = ["fusepy"]
5757
gcs = ["gcsfs"]
5858
git = ["pygit2"]
59-
github = ["requests"]
59+
github = ["fsspec[http]", "requests"]
6060
gs = ["gcsfs"]
6161
gui = ["panel"]
6262
hdfs = ["pyarrow >= 1"]

0 commit comments

Comments
 (0)