Skip to content

Commit 33bbc21

Browse files
committed
Merge branch 'ps/reftable-block-iteration-optim'
The code to iterate over reftable blocks has seen some optimization to reduce memory allocation and deallocation. * ps/reftable-block-iteration-optim: reftable/block: avoid copying block iterators on seek reftable/block: reuse `zstream` state on inflation reftable/block: open-code call to `uncompress2()` reftable/block: reuse uncompressed blocks reftable/reader: iterate to next block in place reftable/block: move ownership of block reader into `struct table_iter` reftable/block: introduce `block_reader_release()` reftable/block: better grouping of functions reftable/block: merge `block_iter_seek()` and `block_reader_seek()` reftable/block: rename `block_reader_start()`
2 parents 00e10ef + 9da5c99 commit 33bbc21

File tree

5 files changed

+229
-178
lines changed

5 files changed

+229
-178
lines changed

reftable/block.c

Lines changed: 105 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,6 @@ int block_writer_finish(struct block_writer *w)
175175
return w->next;
176176
}
177177

178-
uint8_t block_reader_type(struct block_reader *r)
179-
{
180-
return r->block.data[r->header_off];
181-
}
182-
183178
int block_reader_init(struct block_reader *br, struct reftable_block *block,
184179
uint32_t header_off, uint32_t table_block_size,
185180
int hash_size)
@@ -191,45 +186,66 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
191186
uint16_t restart_count = 0;
192187
uint32_t restart_start = 0;
193188
uint8_t *restart_bytes = NULL;
194-
uint8_t *uncompressed = NULL;
189+
190+
reftable_block_done(&br->block);
195191

196192
if (!reftable_is_block_type(typ)) {
197193
err = REFTABLE_FORMAT_ERROR;
198194
goto done;
199195
}
200196

201197
if (typ == BLOCK_TYPE_LOG) {
202-
int block_header_skip = 4 + header_off;
203-
uLongf dst_len = sz - block_header_skip; /* total size of dest
204-
buffer. */
205-
uLongf src_len = block->len - block_header_skip;
198+
uint32_t block_header_skip = 4 + header_off;
199+
uLong dst_len = sz - block_header_skip;
200+
uLong src_len = block->len - block_header_skip;
206201

207202
/* Log blocks specify the *uncompressed* size in their header. */
208-
REFTABLE_ALLOC_ARRAY(uncompressed, sz);
203+
REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
204+
br->uncompressed_cap);
209205

210206
/* Copy over the block header verbatim. It's not compressed. */
211-
memcpy(uncompressed, block->data, block_header_skip);
207+
memcpy(br->uncompressed_data, block->data, block_header_skip);
212208

213-
/* Uncompress */
214-
if (Z_OK !=
215-
uncompress2(uncompressed + block_header_skip, &dst_len,
216-
block->data + block_header_skip, &src_len)) {
209+
if (!br->zstream) {
210+
REFTABLE_CALLOC_ARRAY(br->zstream, 1);
211+
err = inflateInit(br->zstream);
212+
} else {
213+
err = inflateReset(br->zstream);
214+
}
215+
if (err != Z_OK) {
217216
err = REFTABLE_ZLIB_ERROR;
218217
goto done;
219218
}
220219

221-
if (dst_len + block_header_skip != sz) {
220+
br->zstream->next_in = block->data + block_header_skip;
221+
br->zstream->avail_in = src_len;
222+
br->zstream->next_out = br->uncompressed_data + block_header_skip;
223+
br->zstream->avail_out = dst_len;
224+
225+
/*
226+
* We know both input as well as output size, and we know that
227+
* the sizes should never be bigger than `uInt_MAX` because
228+
* blocks can at most be 16MB large. We can thus use `Z_FINISH`
229+
* here to instruct zlib to inflate the data in one go, which
230+
* is more efficient than using `Z_NO_FLUSH`.
231+
*/
232+
err = inflate(br->zstream, Z_FINISH);
233+
if (err != Z_STREAM_END) {
234+
err = REFTABLE_ZLIB_ERROR;
235+
goto done;
236+
}
237+
err = 0;
238+
239+
if (br->zstream->total_out + block_header_skip != sz) {
222240
err = REFTABLE_FORMAT_ERROR;
223241
goto done;
224242
}
225243

226244
/* We're done with the input data. */
227245
reftable_block_done(block);
228-
block->data = uncompressed;
229-
uncompressed = NULL;
246+
block->data = br->uncompressed_data;
230247
block->len = sz;
231-
block->source = malloc_block_source();
232-
full_block_size = src_len + block_header_skip;
248+
full_block_size = src_len + block_header_skip - br->zstream->avail_in;
233249
} else if (full_block_size == 0) {
234250
full_block_size = sz;
235251
} else if (sz < full_block_size && sz < block->len &&
@@ -257,26 +273,60 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
257273
br->restart_bytes = restart_bytes;
258274

259275
done:
260-
reftable_free(uncompressed);
261276
return err;
262277
}
263278

264-
static uint32_t block_reader_restart_offset(struct block_reader *br, int i)
279+
void block_reader_release(struct block_reader *br)
280+
{
281+
inflateEnd(br->zstream);
282+
reftable_free(br->zstream);
283+
reftable_free(br->uncompressed_data);
284+
reftable_block_done(&br->block);
285+
}
286+
287+
uint8_t block_reader_type(const struct block_reader *r)
288+
{
289+
return r->block.data[r->header_off];
290+
}
291+
292+
int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
293+
{
294+
int off = br->header_off + 4, n;
295+
struct string_view in = {
296+
.buf = br->block.data + off,
297+
.len = br->block_len - off,
298+
};
299+
uint8_t extra = 0;
300+
301+
strbuf_reset(key);
302+
303+
n = reftable_decode_key(key, &extra, in);
304+
if (n < 0)
305+
return n;
306+
if (!key->len)
307+
return REFTABLE_FORMAT_ERROR;
308+
309+
return 0;
310+
}
311+
312+
static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
265313
{
266314
return get_be24(br->restart_bytes + 3 * i);
267315
}
268316

269-
void block_reader_start(struct block_reader *br, struct block_iter *it)
317+
void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
270318
{
271-
it->br = br;
319+
it->block = br->block.data;
320+
it->block_len = br->block_len;
321+
it->hash_size = br->hash_size;
272322
strbuf_reset(&it->last_key);
273323
it->next_off = br->header_off + 4;
274324
}
275325

276326
struct restart_needle_less_args {
277327
int error;
278328
struct strbuf needle;
279-
struct block_reader *reader;
329+
const struct block_reader *reader;
280330
};
281331

282332
static int restart_needle_less(size_t idx, void *_args)
@@ -315,25 +365,17 @@ static int restart_needle_less(size_t idx, void *_args)
315365
return args->needle.len < suffix_len;
316366
}
317367

318-
void block_iter_copy_from(struct block_iter *dest, struct block_iter *src)
319-
{
320-
dest->br = src->br;
321-
dest->next_off = src->next_off;
322-
strbuf_reset(&dest->last_key);
323-
strbuf_addbuf(&dest->last_key, &src->last_key);
324-
}
325-
326368
int block_iter_next(struct block_iter *it, struct reftable_record *rec)
327369
{
328370
struct string_view in = {
329-
.buf = it->br->block.data + it->next_off,
330-
.len = it->br->block_len - it->next_off,
371+
.buf = (unsigned char *) it->block + it->next_off,
372+
.len = it->block_len - it->next_off,
331373
};
332374
struct string_view start = in;
333375
uint8_t extra = 0;
334376
int n = 0;
335377

336-
if (it->next_off >= it->br->block_len)
378+
if (it->next_off >= it->block_len)
337379
return 1;
338380

339381
n = reftable_decode_key(&it->last_key, &extra, in);
@@ -343,7 +385,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
343385
return REFTABLE_FORMAT_ERROR;
344386

345387
string_view_consume(&in, n);
346-
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size,
388+
n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
347389
&it->scratch);
348390
if (n < 0)
349391
return -1;
@@ -353,29 +395,13 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
353395
return 0;
354396
}
355397

356-
int block_reader_first_key(struct block_reader *br, struct strbuf *key)
398+
void block_iter_reset(struct block_iter *it)
357399
{
358-
int off = br->header_off + 4, n;
359-
struct string_view in = {
360-
.buf = br->block.data + off,
361-
.len = br->block_len - off,
362-
};
363-
uint8_t extra = 0;
364-
365-
strbuf_reset(key);
366-
367-
n = reftable_decode_key(key, &extra, in);
368-
if (n < 0)
369-
return n;
370-
if (!key->len)
371-
return REFTABLE_FORMAT_ERROR;
372-
373-
return 0;
374-
}
375-
376-
int block_iter_seek(struct block_iter *it, struct strbuf *want)
377-
{
378-
return block_reader_seek(it->br, it, want);
400+
strbuf_reset(&it->last_key);
401+
it->next_off = 0;
402+
it->block = NULL;
403+
it->block_len = 0;
404+
it->hash_size = 0;
379405
}
380406

381407
void block_iter_close(struct block_iter *it)
@@ -384,14 +410,13 @@ void block_iter_close(struct block_iter *it)
384410
strbuf_release(&it->scratch);
385411
}
386412

387-
int block_reader_seek(struct block_reader *br, struct block_iter *it,
388-
struct strbuf *want)
413+
int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
414+
struct strbuf *want)
389415
{
390416
struct restart_needle_less_args args = {
391417
.needle = *want,
392418
.reader = br,
393419
};
394-
struct block_iter next = BLOCK_ITER_INIT;
395420
struct reftable_record rec;
396421
int err = 0;
397422
size_t i;
@@ -436,7 +461,9 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
436461
it->next_off = block_reader_restart_offset(br, i - 1);
437462
else
438463
it->next_off = br->header_off + 4;
439-
it->br = br;
464+
it->block = br->block.data;
465+
it->block_len = br->block_len;
466+
it->hash_size = br->hash_size;
440467

441468
reftable_record_init(&rec, block_reader_type(br));
442469

@@ -448,11 +475,13 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
448475
* far and then back up.
449476
*/
450477
while (1) {
451-
block_iter_copy_from(&next, it);
452-
err = block_iter_next(&next, &rec);
478+
size_t prev_off = it->next_off;
479+
480+
err = block_iter_next(it, &rec);
453481
if (err < 0)
454482
goto done;
455483
if (err > 0) {
484+
it->next_off = prev_off;
456485
err = 0;
457486
goto done;
458487
}
@@ -463,18 +492,23 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
463492
* record does not exist in the block and can thus abort early.
464493
* In case it is equal to the sought-after key we have found
465494
* the desired record.
495+
*
496+
* Note that we store the next record's key record directly in
497+
* `last_key` without restoring the key of the preceding record
498+
* in case we need to go one record back. This is safe to do as
499+
* `block_iter_next()` would return the ref whose key is equal
500+
* to `last_key` now, and naturally all keys share a prefix
501+
* with themselves.
466502
*/
467503
reftable_record_key(&rec, &it->last_key);
468-
if (strbuf_cmp(&it->last_key, want) >= 0)
504+
if (strbuf_cmp(&it->last_key, want) >= 0) {
505+
it->next_off = prev_off;
469506
goto done;
470-
471-
block_iter_copy_from(it, &next);
507+
}
472508
}
473509

474510
done:
475-
block_iter_close(&next);
476511
reftable_record_release(&rec);
477-
478512
return err;
479513
}
480514

reftable/block.h

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ int block_writer_finish(struct block_writer *w);
5656
/* clears out internally allocated block_writer members. */
5757
void block_writer_release(struct block_writer *bw);
5858

59+
struct z_stream;
60+
5961
/* Read a block. */
6062
struct block_reader {
6163
/* offset of the block header; nonzero for the first block in a
@@ -66,6 +68,11 @@ struct block_reader {
6668
struct reftable_block block;
6769
int hash_size;
6870

71+
/* Uncompressed data for log entries. */
72+
z_stream *zstream;
73+
unsigned char *uncompressed_data;
74+
size_t uncompressed_cap;
75+
6976
/* size of the data, excluding restart data. */
7077
uint32_t block_len;
7178
uint8_t *restart_bytes;
@@ -76,11 +83,26 @@ struct block_reader {
7683
uint32_t full_block_size;
7784
};
7885

86+
/* initializes a block reader. */
87+
int block_reader_init(struct block_reader *br, struct reftable_block *bl,
88+
uint32_t header_off, uint32_t table_block_size,
89+
int hash_size);
90+
91+
void block_reader_release(struct block_reader *br);
92+
93+
/* Returns the block type (eg. 'r' for refs) */
94+
uint8_t block_reader_type(const struct block_reader *r);
95+
96+
/* Decodes the first key in the block */
97+
int block_reader_first_key(const struct block_reader *br, struct strbuf *key);
98+
7999
/* Iterate over entries in a block */
80100
struct block_iter {
81101
/* offset within the block of the next entry to read. */
82102
uint32_t next_off;
83-
struct block_reader *br;
103+
const unsigned char *block;
104+
size_t block_len;
105+
int hash_size;
84106

85107
/* key for last entry we read. */
86108
struct strbuf last_key;
@@ -92,31 +114,18 @@ struct block_iter {
92114
.scratch = STRBUF_INIT, \
93115
}
94116

95-
/* initializes a block reader. */
96-
int block_reader_init(struct block_reader *br, struct reftable_block *bl,
97-
uint32_t header_off, uint32_t table_block_size,
98-
int hash_size);
99-
100117
/* Position `it` at start of the block */
101-
void block_reader_start(struct block_reader *br, struct block_iter *it);
118+
void block_iter_seek_start(struct block_iter *it, const struct block_reader *br);
102119

103120
/* Position `it` to the `want` key in the block */
104-
int block_reader_seek(struct block_reader *br, struct block_iter *it,
105-
struct strbuf *want);
106-
107-
/* Returns the block type (eg. 'r' for refs) */
108-
uint8_t block_reader_type(struct block_reader *r);
109-
110-
/* Decodes the first key in the block */
111-
int block_reader_first_key(struct block_reader *br, struct strbuf *key);
112-
113-
void block_iter_copy_from(struct block_iter *dest, struct block_iter *src);
121+
int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
122+
struct strbuf *want);
114123

115124
/* return < 0 for error, 0 for OK, > 0 for EOF. */
116125
int block_iter_next(struct block_iter *it, struct reftable_record *rec);
117126

118-
/* Seek to `want` with in the block pointed to by `it` */
119-
int block_iter_seek(struct block_iter *it, struct strbuf *want);
127+
/* Reset the block iterator to pristine state without releasing its memory. */
128+
void block_iter_reset(struct block_iter *it);
120129

121130
/* deallocate memory for `it`. The block reader and its block is left intact. */
122131
void block_iter_close(struct block_iter *it);

0 commit comments

Comments
 (0)