Skip to content

Commit 448a74e

Browse files
committed
Merge branch 'ps/reftable-iteration-perf-part2'
The code to iterate over refs with the reftable backend has seen some optimization. * ps/reftable-iteration-perf-part2: refs/reftable: precompute prefix length reftable: allow inlining of a few functions reftable/record: decode keys in place reftable/record: reuse refname when copying reftable/record: reuse refname when decoding reftable/merged: avoid duplicate pqueue emptiness check reftable/merged: circumvent pqueue with single subiter reftable/merged: handle subiter cleanup on close only reftable/merged: remove unnecessary null check for subiters reftable/merged: make subiters own their records reftable/merged: advance subiter on subsequent iteration reftable/merged: make `merged_iter` structure private reftable/pq: use `size_t` to track iterator index
2 parents 066124d + 43f70ea commit 448a74e

File tree

14 files changed

+175
-181
lines changed

14 files changed

+175
-181
lines changed

refs/reftable-backend.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ struct reftable_ref_iterator {
346346
struct object_id oid;
347347

348348
const char *prefix;
349+
size_t prefix_len;
349350
unsigned int flags;
350351
int err;
351352
};
@@ -374,8 +375,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
374375
continue;
375376
}
376377

377-
if (iter->prefix &&
378-
strncmp(iter->prefix, iter->ref.refname, strlen(iter->prefix))) {
378+
if (iter->prefix_len &&
379+
strncmp(iter->prefix, iter->ref.refname, iter->prefix_len)) {
379380
iter->err = 1;
380381
break;
381382
}
@@ -484,6 +485,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_
484485
iter = xcalloc(1, sizeof(*iter));
485486
base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable);
486487
iter->prefix = prefix;
488+
iter->prefix_len = prefix ? strlen(prefix) : 0;
487489
iter->base.oid = &iter->oid;
488490
iter->flags = flags;
489491
iter->refs = refs;

reftable/block.c

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,8 @@ static int restart_key_less(size_t idx, void *args)
291291
/* the restart key is verbatim in the block, so this could avoid the
292292
alloc for decoding the key */
293293
struct strbuf rkey = STRBUF_INIT;
294-
struct strbuf last_key = STRBUF_INIT;
295294
uint8_t unused_extra;
296-
int n = reftable_decode_key(&rkey, &unused_extra, last_key, in);
295+
int n = reftable_decode_key(&rkey, &unused_extra, in);
297296
int result;
298297
if (n < 0) {
299298
a->error = 1;
@@ -326,35 +325,34 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
326325
if (it->next_off >= it->br->block_len)
327326
return 1;
328327

329-
n = reftable_decode_key(&it->key, &extra, it->last_key, in);
328+
n = reftable_decode_key(&it->last_key, &extra, in);
330329
if (n < 0)
331330
return -1;
332-
333-
if (!it->key.len)
331+
if (!it->last_key.len)
334332
return REFTABLE_FORMAT_ERROR;
335333

336334
string_view_consume(&in, n);
337-
n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size);
335+
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size);
338336
if (n < 0)
339337
return -1;
340338
string_view_consume(&in, n);
341339

342-
strbuf_swap(&it->last_key, &it->key);
343340
it->next_off += start.len - in.len;
344341
return 0;
345342
}
346343

347344
int block_reader_first_key(struct block_reader *br, struct strbuf *key)
348345
{
349-
struct strbuf empty = STRBUF_INIT;
350-
int off = br->header_off + 4;
346+
int off = br->header_off + 4, n;
351347
struct string_view in = {
352348
.buf = br->block.data + off,
353349
.len = br->block_len - off,
354350
};
355-
356351
uint8_t extra = 0;
357-
int n = reftable_decode_key(key, &extra, empty, in);
352+
353+
strbuf_reset(key);
354+
355+
n = reftable_decode_key(key, &extra, in);
358356
if (n < 0)
359357
return n;
360358
if (!key->len)
@@ -371,7 +369,6 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want)
371369
void block_iter_close(struct block_iter *it)
372370
{
373371
strbuf_release(&it->last_key);
374-
strbuf_release(&it->key);
375372
}
376373

377374
int block_reader_seek(struct block_reader *br, struct block_iter *it,
@@ -408,8 +405,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
408405
if (err < 0)
409406
goto done;
410407

411-
reftable_record_key(&rec, &it->key);
412-
if (err > 0 || strbuf_cmp(&it->key, want) >= 0) {
408+
reftable_record_key(&rec, &it->last_key);
409+
if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) {
413410
err = 0;
414411
goto done;
415412
}

reftable/block.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,10 @@ struct block_iter {
8484

8585
/* key for last entry we read. */
8686
struct strbuf last_key;
87-
struct strbuf key;
8887
};
8988

9089
#define BLOCK_ITER_INIT { \
9190
.last_key = STRBUF_INIT, \
92-
.key = STRBUF_INIT, \
9391
}
9492

9593
/* initializes a block reader. */

reftable/iter.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ license that can be found in the LICENSE file or at
1616
#include "reader.h"
1717
#include "reftable-error.h"
1818

19-
int iterator_is_null(struct reftable_iterator *it)
20-
{
21-
return !it->ops;
22-
}
23-
2419
static void filtering_ref_iterator_close(void *iter_arg)
2520
{
2621
struct filtering_ref_iterator *fri = iter_arg;

reftable/iter.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ license that can be found in the LICENSE file or at
1616
#include "reftable-iterator.h"
1717
#include "reftable-generic.h"
1818

19-
/* Returns true for a zeroed out iterator, such as the one returned from
20-
* iterator_destroy. */
21-
int iterator_is_null(struct reftable_iterator *it);
22-
2319
/* iterator that produces only ref records that point to `oid` */
2420
struct filtering_ref_iterator {
2521
int double_check;

reftable/merged.c

Lines changed: 72 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,37 @@ license that can be found in the LICENSE file or at
1717
#include "reftable-error.h"
1818
#include "system.h"
1919

20+
struct merged_subiter {
21+
struct reftable_iterator iter;
22+
struct reftable_record rec;
23+
};
24+
25+
struct merged_iter {
26+
struct merged_subiter *subiters;
27+
struct merged_iter_pqueue pq;
28+
uint32_t hash_id;
29+
size_t stack_len;
30+
uint8_t typ;
31+
int suppress_deletions;
32+
ssize_t advance_index;
33+
};
34+
2035
static int merged_iter_init(struct merged_iter *mi)
2136
{
2237
for (size_t i = 0; i < mi->stack_len; i++) {
2338
struct pq_entry e = {
2439
.index = i,
40+
.rec = &mi->subiters[i].rec,
2541
};
2642
int err;
2743

28-
reftable_record_init(&e.rec, mi->typ);
29-
err = iterator_next(&mi->stack[i], &e.rec);
44+
reftable_record_init(&mi->subiters[i].rec, mi->typ);
45+
err = iterator_next(&mi->subiters[i].iter,
46+
&mi->subiters[i].rec);
3047
if (err < 0)
3148
return err;
32-
if (err > 0) {
33-
reftable_iterator_destroy(&mi->stack[i]);
34-
reftable_record_release(&e.rec);
49+
if (err > 0)
3550
continue;
36-
}
3751

3852
merged_iter_pqueue_add(&mi->pq, &e);
3953
}
@@ -46,54 +60,66 @@ static void merged_iter_close(void *p)
4660
struct merged_iter *mi = p;
4761

4862
merged_iter_pqueue_release(&mi->pq);
49-
for (size_t i = 0; i < mi->stack_len; i++)
50-
reftable_iterator_destroy(&mi->stack[i]);
51-
reftable_free(mi->stack);
63+
for (size_t i = 0; i < mi->stack_len; i++) {
64+
reftable_iterator_destroy(&mi->subiters[i].iter);
65+
reftable_record_release(&mi->subiters[i].rec);
66+
}
67+
reftable_free(mi->subiters);
5268
}
5369

54-
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
55-
size_t idx)
70+
static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
5671
{
5772
struct pq_entry e = {
5873
.index = idx,
74+
.rec = &mi->subiters[idx].rec,
5975
};
6076
int err;
6177

62-
reftable_record_init(&e.rec, mi->typ);
63-
err = iterator_next(&mi->stack[idx], &e.rec);
64-
if (err < 0)
78+
err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
79+
if (err)
6580
return err;
6681

67-
if (err > 0) {
68-
reftable_iterator_destroy(&mi->stack[idx]);
69-
reftable_record_release(&e.rec);
70-
return 0;
71-
}
72-
7382
merged_iter_pqueue_add(&mi->pq, &e);
7483
return 0;
7584
}
7685

77-
static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
78-
{
79-
if (iterator_is_null(&mi->stack[idx]))
80-
return 0;
81-
return merged_iter_advance_nonnull_subiter(mi, idx);
82-
}
83-
8486
static int merged_iter_next_entry(struct merged_iter *mi,
8587
struct reftable_record *rec)
8688
{
8789
struct pq_entry entry = { 0 };
88-
int err = 0;
90+
int err = 0, empty;
91+
92+
empty = merged_iter_pqueue_is_empty(mi->pq);
93+
94+
if (mi->advance_index >= 0) {
95+
/*
96+
* When there are no pqueue entries then we only have a single
97+
* subiter left. There is no need to use the pqueue in that
98+
* case anymore as we know that the subiter will return entries
99+
* in the correct order already.
100+
*
101+
* While this may sound like a very specific edge case, it may
102+
* happen more frequently than you think. Most repositories
103+
* will end up having a single large base table that contains
104+
* most of the refs. It's thus likely that we exhaust all
105+
* subiters but the one from that base ref.
106+
*/
107+
if (empty)
108+
return iterator_next(&mi->subiters[mi->advance_index].iter,
109+
rec);
110+
111+
err = merged_iter_advance_subiter(mi, mi->advance_index);
112+
if (err < 0)
113+
return err;
114+
if (!err)
115+
empty = 0;
116+
mi->advance_index = -1;
117+
}
89118

90-
if (merged_iter_pqueue_is_empty(mi->pq))
119+
if (empty)
91120
return 1;
92121

93122
entry = merged_iter_pqueue_remove(&mi->pq);
94-
err = merged_iter_advance_subiter(mi, entry.index);
95-
if (err < 0)
96-
return err;
97123

98124
/*
99125
One can also use reftable as datacenter-local storage, where the ref
@@ -107,56 +133,34 @@ static int merged_iter_next_entry(struct merged_iter *mi,
107133
struct pq_entry top = merged_iter_pqueue_top(mi->pq);
108134
int cmp;
109135

110-
/*
111-
* When the next entry comes from the same queue as the current
112-
* entry then it must by definition be larger. This avoids a
113-
* comparison in the most common case.
114-
*/
115-
if (top.index == entry.index)
116-
break;
117-
118-
cmp = reftable_record_cmp(&top.rec, &entry.rec);
136+
cmp = reftable_record_cmp(top.rec, entry.rec);
119137
if (cmp > 0)
120138
break;
121139

122140
merged_iter_pqueue_remove(&mi->pq);
123141
err = merged_iter_advance_subiter(mi, top.index);
124142
if (err < 0)
125-
goto done;
126-
reftable_record_release(&top.rec);
143+
return err;
127144
}
128145

129-
reftable_record_release(rec);
130-
*rec = entry.rec;
131-
132-
done:
133-
if (err)
134-
reftable_record_release(&entry.rec);
135-
return err;
146+
mi->advance_index = entry.index;
147+
SWAP(*rec, *entry.rec);
148+
return 0;
136149
}
137150

138-
static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec)
151+
static int merged_iter_next_void(void *p, struct reftable_record *rec)
139152
{
153+
struct merged_iter *mi = p;
140154
while (1) {
141155
int err = merged_iter_next_entry(mi, rec);
142-
if (err == 0 && mi->suppress_deletions &&
143-
reftable_record_is_deletion(rec)) {
156+
if (err)
157+
return err;
158+
if (mi->suppress_deletions && reftable_record_is_deletion(rec))
144159
continue;
145-
}
146-
147-
return err;
160+
return 0;
148161
}
149162
}
150163

151-
static int merged_iter_next_void(void *p, struct reftable_record *rec)
152-
{
153-
struct merged_iter *mi = p;
154-
if (merged_iter_pqueue_is_empty(mi->pq))
155-
return 1;
156-
157-
return merged_iter_next(mi, rec);
158-
}
159-
160164
static struct reftable_iterator_vtable merged_iter_vtable = {
161165
.next = &merged_iter_next_void,
162166
.close = &merged_iter_close,
@@ -246,14 +250,15 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
246250
.typ = reftable_record_type(rec),
247251
.hash_id = mt->hash_id,
248252
.suppress_deletions = mt->suppress_deletions,
253+
.advance_index = -1,
249254
};
250255
struct merged_iter *p;
251256
int err;
252257

253-
REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len);
258+
REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
254259
for (size_t i = 0; i < mt->stack_len; i++) {
255260
err = reftable_table_seek_record(&mt->stack[i],
256-
&merged.stack[merged.stack_len], rec);
261+
&merged.subiters[merged.stack_len].iter, rec);
257262
if (err < 0)
258263
goto out;
259264
if (!err)

reftable/merged.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ license that can be found in the LICENSE file or at
99
#ifndef MERGED_H
1010
#define MERGED_H
1111

12-
#include "pq.h"
12+
#include "system.h"
1313

1414
struct reftable_merged_table {
1515
struct reftable_table *stack;
@@ -24,15 +24,6 @@ struct reftable_merged_table {
2424
uint64_t max;
2525
};
2626

27-
struct merged_iter {
28-
struct reftable_iterator *stack;
29-
uint32_t hash_id;
30-
size_t stack_len;
31-
uint8_t typ;
32-
int suppress_deletions;
33-
struct merged_iter_pqueue pq;
34-
};
35-
3627
void merged_table_release(struct reftable_merged_table *mt);
3728

3829
#endif

0 commit comments

Comments
 (0)