@@ -273,34 +273,36 @@ void block_reader_start(struct block_reader *br, struct block_iter *it)
273
273
it -> next_off = br -> header_off + 4 ;
274
274
}
275
275
276
- struct restart_find_args {
276
+ struct restart_needle_less_args {
277
277
int error ;
278
- struct strbuf key ;
279
- struct block_reader * r ;
278
+ struct strbuf needle ;
279
+ struct block_reader * reader ;
280
280
};
281
281
282
- static int restart_key_less (size_t idx , void * args )
282
+ static int restart_needle_less (size_t idx , void * _args )
283
283
{
284
- struct restart_find_args * a = args ;
285
- uint32_t off = block_reader_restart_offset (a -> r , idx );
284
+ struct restart_needle_less_args * args = _args ;
285
+ uint32_t off = block_reader_restart_offset (args -> reader , idx );
286
286
struct string_view in = {
287
- .buf = a -> r -> block .data + off ,
288
- .len = a -> r -> block_len - off ,
287
+ .buf = args -> reader -> block .data + off ,
288
+ .len = args -> reader -> block_len - off ,
289
289
};
290
-
291
- /* the restart key is verbatim in the block, so this could avoid the
292
- alloc for decoding the key */
293
- struct strbuf rkey = STRBUF_INIT ;
290
+ struct strbuf kth_restart_key = STRBUF_INIT ;
294
291
uint8_t unused_extra ;
295
- int n = reftable_decode_key (& rkey , & unused_extra , in );
296
- int result ;
292
+ int result , n ;
293
+
294
+ /*
295
+ * TODO: The restart key is verbatim in the block, so we can in theory
296
+ * avoid decoding the key and thus save some allocations.
297
+ */
298
+ n = reftable_decode_key (& kth_restart_key , & unused_extra , in );
297
299
if (n < 0 ) {
298
- a -> error = 1 ;
300
+ args -> error = 1 ;
299
301
return -1 ;
300
302
}
301
303
302
- result = strbuf_cmp (& a -> key , & rkey );
303
- strbuf_release (& rkey );
304
+ result = strbuf_cmp (& args -> needle , & kth_restart_key );
305
+ strbuf_release (& kth_restart_key );
304
306
return result < 0 ;
305
307
}
306
308
@@ -376,9 +378,9 @@ void block_iter_close(struct block_iter *it)
376
378
int block_reader_seek (struct block_reader * br , struct block_iter * it ,
377
379
struct strbuf * want )
378
380
{
379
- struct restart_find_args args = {
380
- .key = * want ,
381
- .r = br ,
381
+ struct restart_needle_less_args args = {
382
+ .needle = * want ,
383
+ .reader = br ,
382
384
};
383
385
struct block_iter next = BLOCK_ITER_INIT ;
384
386
struct reftable_record rec ;
@@ -390,7 +392,38 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
390
392
goto done ;
391
393
}
392
394
393
- i = binsearch (br -> restart_count , & restart_key_less , & args );
395
+ /*
396
+ * Perform a binary search over the block's restart points, which
397
+ * avoids doing a linear scan over the whole block. Like this, we
398
+ * identify the section of the block that should contain our key.
399
+ *
400
+ * Note that we explicitly search for the first restart point _greater_
401
+ * than the sought-after record, not _greater or equal_ to it. In case
402
+ * the sought-after record is located directly at the restart point we
403
+ * would otherwise start doing the linear search at the preceding
404
+ * restart point. While that works alright, we would end up scanning
405
+ * too many record.
406
+ */
407
+ i = binsearch (br -> restart_count , & restart_needle_less , & args );
408
+
409
+ /*
410
+ * Now there are multiple cases:
411
+ *
412
+ * - `i == 0`: The wanted record is smaller than the record found at
413
+ * the first restart point. As the first restart point is the first
414
+ * record in the block, our wanted record cannot be located in this
415
+ * block at all. We still need to position the iterator so that the
416
+ * next call to `block_iter_next()` will yield an end-of-iterator
417
+ * signal.
418
+ *
419
+ * - `i == restart_count`: The wanted record was not found at any of
420
+ * the restart points. As there is no restart point at the end of
421
+ * the section the record may thus be contained in the last block.
422
+ *
423
+ * - `i > 0`: The wanted record must be contained in the section
424
+ * before the found restart point. We thus do a linear search
425
+ * starting from the preceding restart point.
426
+ */
394
427
if (i > 0 )
395
428
it -> next_off = block_reader_restart_offset (br , i - 1 );
396
429
else
@@ -399,21 +432,34 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
399
432
400
433
reftable_record_init (& rec , block_reader_type (br ));
401
434
402
- /* We're looking for the last entry less/equal than the wanted key, so
403
- we have to go one entry too far and then back up.
404
- */
435
+ /*
436
+ * We're looking for the last entry less than the wanted key so that
437
+ * the next call to `block_reader_next()` would yield the wanted
438
+ * record. We thus don't want to position our reader at the sought
439
+ * after record, but one before. To do so, we have to go one entry too
440
+ * far and then back up.
441
+ */
405
442
while (1 ) {
406
443
block_iter_copy_from (& next , it );
407
444
err = block_iter_next (& next , & rec );
408
445
if (err < 0 )
409
446
goto done ;
410
-
411
- reftable_record_key (& rec , & it -> last_key );
412
- if (err > 0 || strbuf_cmp (& it -> last_key , want ) >= 0 ) {
447
+ if (err > 0 ) {
413
448
err = 0 ;
414
449
goto done ;
415
450
}
416
451
452
+ /*
453
+ * Check whether the current key is greater or equal to the
454
+ * sought-after key. In case it is greater we know that the
455
+ * record does not exist in the block and can thus abort early.
456
+ * In case it is equal to the sought-after key we have found
457
+ * the desired record.
458
+ */
459
+ reftable_record_key (& rec , & it -> last_key );
460
+ if (strbuf_cmp (& it -> last_key , want ) >= 0 )
461
+ goto done ;
462
+
417
463
block_iter_copy_from (it , & next );
418
464
}
419
465
0 commit comments