From bd2224fd1230b5de2f20346364ad5d372ad8f054 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Tue, 22 Oct 2024 16:01:14 -0400 Subject: [PATCH 01/10] Fix broken async reference file system _cat_file method --- fsspec/implementations/reference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index d5d3f2968..f9ce00597 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -803,7 +803,7 @@ async def _cat_file(self, path, start=None, end=None, **kwargs): return part_or_url[start:end] protocol, _ = split_protocol(part_or_url) try: - await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) + return await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) except Exception as e: raise ReferenceNotReachable(path, part_or_url) from e From 6bccdd96d02c3483ef9844fbfdd4a98b49b093b3 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Tue, 22 Oct 2024 21:02:08 -0400 Subject: [PATCH 02/10] Fix start and end constraints for asycn reference cat_file --- fsspec/implementations/reference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index f9ce00597..580db965d 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -803,7 +803,7 @@ async def _cat_file(self, path, start=None, end=None, **kwargs): return part_or_url[start:end] protocol, _ = split_protocol(part_or_url) try: - return await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) + return await self.fss[protocol]._cat_file(part_or_url, start=start0, end=end0) except Exception as e: raise ReferenceNotReachable(path, part_or_url) from e From b572966132f551c99ed25ddd58afebd2ef60235b Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 23 Oct 2024 08:50:16 -0400 Subject: [PATCH 03/10] Add test for async reference filesystem --- .../implementations/tests/test_reference.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index d82dc1771..800261c36 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -513,6 +513,28 @@ def test_cat_file_ranges(m): assert fs.cat_file("d", 1, -3) == other[4:10][1:-3] +@pytest.mark.asyncio +async def test_async_cat_file_ranges(): + fs = fsspec.filesystem( + "reference", + fo={ + "version": 1, + "refs": { + "reference_time/0": [ + "http://noaa-nwm-retro-v2-0-pds.s3.amazonaws.com/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp", + 39783, + 12, + ], + }, + }, + remote_protocol="http", + remote_options={"asynchronous": True}, + asynchronous=True, + ) + + assert await fs._cat_file("reference_time/0") == b'x^K0\xa9d\x04\x00\x03\x13\x01\x0f' + + @pytest.mark.parametrize( "fo", [ From fad218aa62790822b23a842ce01e1d84885de7d0 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 23 Oct 2024 10:22:57 -0400 Subject: [PATCH 04/10] Set session for http filesystem in test --- fsspec/implementations/tests/test_reference.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 800261c36..4f2db910c 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -515,6 +515,9 @@ def test_cat_file_ranges(m): @pytest.mark.asyncio async def test_async_cat_file_ranges(): + fss = fsspec.filesystem("http", asynchronous=True) + session = await fss.set_session() + fs = fsspec.filesystem( "reference", fo={ @@ -527,12 +530,13 @@ async def test_async_cat_file_ranges(): ], }, }, - remote_protocol="http", - remote_options={"asynchronous": True}, + fs=fss, asynchronous=True, ) + assert await fs._cat_file("reference_time/0") == b'x^K0\xa9d\x04\x00\x03\x13\x01\x0f' + await session.close() @pytest.mark.parametrize( From d2917f73deb714f07e9ab2f3aad3a0fe31442609 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 23 Oct 2024 10:34:41 -0400 Subject: [PATCH 05/10] lint --- fsspec/implementations/tests/test_reference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 4f2db910c..04fe582e0 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -534,7 +534,6 @@ async def test_async_cat_file_ranges(): asynchronous=True, ) - assert await fs._cat_file("reference_time/0") == b'x^K0\xa9d\x04\x00\x03\x13\x01\x0f' await session.close() From 640c84c0c2c01de90800a3e0fa536501fa8457d9 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 23 Oct 2024 11:31:44 -0400 Subject: [PATCH 06/10] Use https specifically --- fsspec/implementations/tests/test_reference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 04fe582e0..8451b1a20 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -524,13 +524,13 @@ async def test_async_cat_file_ranges(): "version": 1, "refs": { "reference_time/0": [ - "http://noaa-nwm-retro-v2-0-pds.s3.amazonaws.com/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp", + "https://noaa-nwm-retro-v2-0-pds.s3.amazonaws.com/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp", 39783, 12, ], }, }, - fs=fss, + fs={'https': fss}, asynchronous=True, ) From a0f13c2699b3fcd55c4a8af09b45ada334dce796 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 23 Oct 2024 12:04:32 -0400 Subject: [PATCH 07/10] lint --- fsspec/implementations/tests/test_reference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 8451b1a20..5d1b9a8dc 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -530,11 +530,13 @@ async def test_async_cat_file_ranges(): ], }, }, - fs={'https': fss}, + fs={"https": fss}, asynchronous=True, ) - assert await fs._cat_file("reference_time/0") == b'x^K0\xa9d\x04\x00\x03\x13\x01\x0f' + assert ( + await fs._cat_file("reference_time/0") == b"x^K0\xa9d\x04\x00\x03\x13\x01\x0f" + ) await session.close() From a01ffda633fcc698024153d79e31f103f57a9c5e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 23 Oct 2024 17:23:59 -0400 Subject: [PATCH 08/10] more params --- fsspec/implementations/tests/test_reference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 5d1b9a8dc..b133c7ce6 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -515,7 +515,7 @@ def test_cat_file_ranges(m): @pytest.mark.asyncio async def test_async_cat_file_ranges(): - fss = fsspec.filesystem("http", asynchronous=True) + fss = fsspec.filesystem("https", asynchronous=True) session = await fss.set_session() fs = fsspec.filesystem( @@ -531,6 +531,7 @@ async def test_async_cat_file_ranges(): }, }, fs={"https": fss}, + remote_protocol="https", asynchronous=True, ) From 4ff7afe2f2951c92b2e8c582be199f2574688336 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 23 Oct 2024 17:25:54 -0400 Subject: [PATCH 09/10] lint --- fsspec/implementations/reference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index 580db965d..102ef0efe 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -803,7 +803,9 @@ async def _cat_file(self, path, start=None, end=None, **kwargs): return part_or_url[start:end] protocol, _ = split_protocol(part_or_url) try: - return await self.fss[protocol]._cat_file(part_or_url, start=start0, end=end0) + return await self.fss[protocol]._cat_file( + part_or_url, start=start0, end=end0 + ) except Exception as e: raise ReferenceNotReachable(path, part_or_url) from e From ec30e323f706b0d2e6e444b487d64d834edbbff0 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 23 Oct 2024 17:31:33 -0400 Subject: [PATCH 10/10] hammer --- fsspec/implementations/tests/test_reference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index b133c7ce6..12233d73f 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -515,6 +515,7 @@ def test_cat_file_ranges(m): @pytest.mark.asyncio async def test_async_cat_file_ranges(): + fsspec.get_filesystem_class("http").clear_instance_cache() fss = fsspec.filesystem("https", asynchronous=True) session = await fss.set_session()