@@ -164,19 +164,35 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
164164 start_block = start // self .blocksize
165165 end_block = end // self .blocksize
166166 block_range = range (start_block , end_block + 1 )
167+ # Determine which blocks need to be fetched. This sequence is sorted by construction.
167168 need = (i for i in block_range if i not in self .blocks )
169+ # Count the number of blocks already cached
168170 self .hit_count += sum (1 for i in block_range if i in self .blocks )
169171
170172 # Consolidate needed blocks.
171- # Algorithm adapted from Python 2.x itertools documentation
173+ # Algorithm adapted from Python 2.x itertools documentation.
174+ # We are grouping an enumerated sequence of blocks. By comparing when the difference
175+ # between an ascending range (provided by enumerate) and the needed block numbers
176+ # we can detect when the block number skips values. The key computes this difference.
177+ # Whenever the difference changes, we know that we have previously cached block(s),
178+ # and a new group is started. In other words, this algorithm neatly groups
179+ # runs of consecutive block numbers so they can be fetched together.
172180 for _ , _blocks in groupby (enumerate (need ), key = lambda x : x [0 ] - x [1 ]):
181+ # Extract the blocks from the enumerated sequence
173182 _blocks = tuple (map (itemgetter (1 ), _blocks ))
183+ # Compute start of first block
174184 sstart = _blocks [0 ] * self .blocksize
185+ # Compute the end of the last block. Last block may not be full size.
175186 send = min (_blocks [- 1 ] * self .blocksize + self .blocksize , self .size )
187+
188+ # Fetch bytes (could be multiple consecutive blocks)
176189 self .total_requested_bytes += send - sstart
177190 logger .debug (f"MMap get blocks { _blocks [0 ]} -{ _blocks [- 1 ]} ({ sstart } -{ send } )" )
178191 self .cache [sstart :send ] = self .fetcher (sstart , send )
192+
193+ # Update set of cached blocks
179194 self .blocks .update (_blocks )
195+ # Update cache statistics with number of blocks we had to cache
180196 self .miss_count += len (_blocks )
181197
182198 return self .cache [start :end ]
0 commit comments