Skip to content

Commit 78cf39b

Browse files
committed
Add explanatory comments.
1 parent 2102882 commit 78cf39b

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

fsspec/caching.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,35 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
164164
start_block = start // self.blocksize
165165
end_block = end // self.blocksize
166166
block_range = range(start_block, end_block + 1)
167+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
167168
need = (i for i in block_range if i not in self.blocks)
169+
# Count the number of blocks already cached
168170
self.hit_count += sum(1 for i in block_range if i in self.blocks)
169171

170172
# Consolidate needed blocks.
171-
# Algorithm adapted from Python 2.x itertools documentation
173+
# Algorithm adapted from Python 2.x itertools documentation.
174+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
175+
# between an ascending range (provided by enumerate) and the needed block numbers
176+
# we can detect when the block number skips values. The key computes this difference.
177+
# Whenever the difference changes, we know that we have previously cached block(s),
178+
# and a new group is started. In other words, this algorithm neatly groups
179+
# runs of consecutive block numbers so they can be fetched together.
172180
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
181+
# Extract the blocks from the enumerated sequence
173182
_blocks = tuple(map(itemgetter(1), _blocks))
183+
# Compute start of first block
174184
sstart = _blocks[0] * self.blocksize
185+
# Compute the end of the last block. Last block may not be full size.
175186
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
187+
188+
# Fetch bytes (could be multiple consecutive blocks)
176189
self.total_requested_bytes += send - sstart
177190
logger.debug(f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})")
178191
self.cache[sstart:send] = self.fetcher(sstart, send)
192+
193+
# Update set of cached blocks
179194
self.blocks.update(_blocks)
195+
# Update cache statistics with number of blocks we had to cache
180196
self.miss_count += len(_blocks)
181197

182198
return self.cache[start:end]

0 commit comments

Comments
 (0)