Skip to content

Commit 25b805d

Browse files
btbestmartindurant
andauthored
Expose connection errors in HTTPFileSystem._exists (#1849)
Co-authored-by: Martin Durant <[email protected]>
1 parent 85b6324 commit 25b805d

File tree

5 files changed

+34
-3
lines changed

5 files changed

+34
-3
lines changed

fsspec/implementations/http.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,16 +327,22 @@ async def gen_chunks():
327327
async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
328328
self._raise_not_found_for_status(resp, rpath)
329329

330-
async def _exists(self, path, **kwargs):
330+
async def _exists(self, path, strict=False, **kwargs):
331331
kw = self.kwargs.copy()
332332
kw.update(kwargs)
333333
try:
334334
logger.debug(path)
335335
session = await self.set_session()
336336
r = await session.get(self.encode_url(path), **kw)
337337
async with r:
338+
if strict:
339+
self._raise_not_found_for_status(r, path)
338340
return r.status < 400
341+
except FileNotFoundError:
342+
return False
339343
except aiohttp.ClientError:
344+
if strict:
345+
raise
340346
return False
341347

342348
async def _isfile(self, path, **kwargs):

fsspec/implementations/http_sync.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,14 +463,20 @@ def _process_limits(self, url, start, end):
463463
end -= 1 # bytes range is inclusive
464464
return f"bytes={start}-{end}"
465465

466-
def exists(self, path, **kwargs):
466+
def exists(self, path, strict=False, **kwargs):
467467
kw = self.kwargs.copy()
468468
kw.update(kwargs)
469469
try:
470470
logger.debug(path)
471471
r = self.session.get(self.encode_url(path), **kw)
472+
if strict:
473+
self._raise_not_found_for_status(r, path)
472474
return r.status_code < 400
475+
except FileNotFoundError:
476+
return False
473477
except Exception:
478+
if strict:
479+
raise
474480
return False
475481

476482
def isfile(self, path, **kwargs):

fsspec/implementations/tests/test_http.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,14 @@ def test_exists(server):
163163
h.cat(server.address + "/notafile")
164164

165165

166+
def test_exists_strict(server):
167+
h = fsspec.filesystem("http")
168+
assert not h.exists(server.address + "/notafile", strict=True)
169+
with pytest.raises(aiohttp.ClientResponseError) as e:
170+
h.exists(server.address + "/unauthorized", strict=True)
171+
assert e.value.status == 401
172+
173+
166174
def test_read(server):
167175
h = fsspec.filesystem("http")
168176
out = server.realfile

fsspec/implementations/tests/test_http_sync.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import fsspec.utils
9-
from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401
9+
from fsspec.tests.conftest import data, requests, reset_files, server, win # noqa: F401
1010

1111

1212
@pytest.fixture()
@@ -147,6 +147,14 @@ def test_exists(server, sync):
147147
h.cat(server.address + "/notafile")
148148

149149

150+
def test_exists_strict(server, sync):
151+
h = fsspec.filesystem("http")
152+
assert not h.exists(server.address + "/notafile", strict=True)
153+
with pytest.raises(requests.exceptions.HTTPError) as e:
154+
h.exists(server.address + "/unauthorized", strict=True)
155+
assert e.value.response.status_code == 401
156+
157+
150158
def test_read(server, sync):
151159
h = fsspec.filesystem("http")
152160
out = server.address + "/index/realfile"

fsspec/tests/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class HTTPTestHandler(BaseHTTPRequestHandler):
5454
"/simple/file": data,
5555
"/simple/dir/": _make_listing("/simple/dir/file"),
5656
"/simple/dir/file": data,
57+
"/unauthorized": AssertionError("shouldn't access"),
5758
}
5859
dynamic_files = {}
5960

@@ -85,6 +86,8 @@ def do_GET(self):
8586
if "redirect" in self.headers and file_path != "/index/realfile":
8687
new_url = _make_realfile(baseurl)
8788
return self._respond(301, {"Location": new_url})
89+
if file_path == "/unauthorized":
90+
return self._respond(401)
8891
if file_data is None:
8992
return self._respond(404)
9093

0 commit comments

Comments
 (0)