Skip to content

Commit 1de5e1d

Browse files
authored
Remove prefetching loop (#1984)
1 parent 9603a7c commit 1de5e1d

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

fsspec/caching.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
214214
if self.multi_fetcher:
215215
logger.debug(f"MMap get blocks {ranges}")
216216
for idx, r in enumerate(self.multi_fetcher(ranges)):
217-
(sstart, send) = ranges[idx]
217+
sstart, send = ranges[idx]
218218
logger.debug(f"MMap copy block ({sstart}-{send}")
219219
self.cache[sstart:send] = r
220220
else:
@@ -391,19 +391,8 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
391391
if start >= self.size or start >= end:
392392
return b""
393393

394-
# byte position -> block numbers
395-
start_block_number = start // self.blocksize
396-
end_block_number = end // self.blocksize
397-
398-
# these are cached, so safe to do multiple calls for the same start and end.
399-
for block_number in range(start_block_number, end_block_number + 1):
400-
self._fetch_block_cached(block_number)
401-
402394
return self._read_cache(
403-
start,
404-
end,
405-
start_block_number=start_block_number,
406-
end_block_number=end_block_number,
395+
start, end, start // self.blocksize, (end - 1) // self.blocksize
407396
)
408397

409398
def _fetch_block(self, block_number: int) -> bytes:
@@ -439,6 +428,8 @@ def _read_cache(
439428
"""
440429
start_pos = start % self.blocksize
441430
end_pos = end % self.blocksize
431+
if end_pos == 0:
432+
end_pos = self.blocksize
442433

443434
self.hit_count += 1
444435
if start_block_number == end_block_number:

fsspec/tests/test_caches.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,16 @@ def test_block_cache_lru():
6565
assert cache.total_requested_bytes == block_size * cache.miss_count
6666

6767

68+
def test_block_cache_lru_no_redundant_reads():
69+
block_size = 4
70+
maxblocks = 2
71+
cache = BlockCache(
72+
block_size, letters_fetcher, len(string.ascii_letters), maxblocks=maxblocks
73+
)
74+
cache._fetch(0, block_size * (maxblocks + 1))
75+
assert cache.cache_info().misses == 3
76+
77+
6878
def test_first_cache():
6979
"""
7080
FirstChunkCache is a cache that only caches the first chunk of data

0 commit comments

Comments
 (0)