Skip to content

Commit e8c1cda

Browse files
committed
Merge branch 'ps/reftable-reflog-iteration-perf'
The code to iterate over reflogs in the reftable has been optimized to reduce memory allocation and deallocation. Reviewed-by: Josh Steadmon <[email protected]> cf. <[email protected]> * ps/reftable-reflog-iteration-perf: refs/reftable: track last log record name via strbuf reftable/record: use scratch buffer when decoding records reftable/record: reuse message when decoding log records reftable/record: reuse refnames when decoding log records reftable/record: avoid copying author info reftable/record: convert old and new object IDs to arrays refs/reftable: reload correct stack when creating reflog iter
2 parents dc97afd + fcacc2b commit e8c1cda

File tree

10 files changed

+154
-211
lines changed

10 files changed

+154
-211
lines changed

refs/reftable-backend.c

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -171,23 +171,6 @@ static int should_write_log(struct ref_store *refs, const char *refname)
171171
}
172172
}
173173

174-
static void clear_reftable_log_record(struct reftable_log_record *log)
175-
{
176-
switch (log->value_type) {
177-
case REFTABLE_LOG_UPDATE:
178-
/*
179-
* When we write log records, the hashes are owned by the
180-
* caller and thus shouldn't be free'd.
181-
*/
182-
log->value.update.old_hash = NULL;
183-
log->value.update.new_hash = NULL;
184-
break;
185-
case REFTABLE_LOG_DELETION:
186-
break;
187-
}
188-
reftable_log_record_release(log);
189-
}
190-
191174
static void fill_reftable_log_record(struct reftable_log_record *log)
192175
{
193176
const char *info = git_committer_info(0);
@@ -1106,8 +1089,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
11061089
fill_reftable_log_record(log);
11071090
log->update_index = ts;
11081091
log->refname = xstrdup(u->refname);
1109-
log->value.update.new_hash = u->new_oid.hash;
1110-
log->value.update.old_hash = tx_update->current_oid.hash;
1092+
memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
1093+
memcpy(log->value.update.old_hash, tx_update->current_oid.hash, GIT_MAX_RAWSZ);
11111094
log->value.update.message =
11121095
xstrndup(u->msg, arg->refs->write_options.block_size / 2);
11131096
}
@@ -1162,7 +1145,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
11621145
done:
11631146
assert(ret != REFTABLE_API_ERROR);
11641147
for (i = 0; i < logs_nr; i++)
1165-
clear_reftable_log_record(&logs[i]);
1148+
reftable_log_record_release(&logs[i]);
11661149
free(logs);
11671150
return ret;
11681151
}
@@ -1279,13 +1262,13 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
12791262
log.update_index = ts;
12801263
log.value.update.message = xstrndup(create->logmsg,
12811264
create->refs->write_options.block_size / 2);
1282-
log.value.update.new_hash = new_oid.hash;
1265+
memcpy(log.value.update.new_hash, new_oid.hash, GIT_MAX_RAWSZ);
12831266
if (refs_resolve_ref_unsafe(&create->refs->base, create->refname,
12841267
RESOLVE_REF_READING, &old_oid, NULL))
1285-
log.value.update.old_hash = old_oid.hash;
1268+
memcpy(log.value.update.old_hash, old_oid.hash, GIT_MAX_RAWSZ);
12861269

12871270
ret = reftable_writer_add_log(writer, &log);
1288-
clear_reftable_log_record(&log);
1271+
reftable_log_record_release(&log);
12891272
return ret;
12901273
}
12911274

@@ -1424,7 +1407,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
14241407
logs[logs_nr].update_index = deletion_ts;
14251408
logs[logs_nr].value.update.message =
14261409
xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
1427-
logs[logs_nr].value.update.old_hash = old_ref.value.val1;
1410+
memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
14281411
logs_nr++;
14291412

14301413
ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid, &head_referent, &head_type);
@@ -1456,7 +1439,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
14561439
logs[logs_nr].update_index = creation_ts;
14571440
logs[logs_nr].value.update.message =
14581441
xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
1459-
logs[logs_nr].value.update.new_hash = old_ref.value.val1;
1442+
memcpy(logs[logs_nr].value.update.new_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
14601443
logs_nr++;
14611444

14621445
/*
@@ -1519,10 +1502,6 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
15191502
for (i = 0; i < logs_nr; i++) {
15201503
if (!strcmp(logs[i].refname, "HEAD"))
15211504
continue;
1522-
if (logs[i].value.update.old_hash == old_ref.value.val1)
1523-
logs[i].value.update.old_hash = NULL;
1524-
if (logs[i].value.update.new_hash == old_ref.value.val1)
1525-
logs[i].value.update.new_hash = NULL;
15261505
logs[i].refname = NULL;
15271506
reftable_log_record_release(&logs[i]);
15281507
}
@@ -1600,7 +1579,7 @@ struct reftable_reflog_iterator {
16001579
struct reftable_ref_store *refs;
16011580
struct reftable_iterator iter;
16021581
struct reftable_log_record log;
1603-
char *last_name;
1582+
struct strbuf last_name;
16041583
int err;
16051584
};
16061585

@@ -1619,15 +1598,15 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator)
16191598
* we've already produced this name. This could be faster by
16201599
* seeking directly to reflog@update_index==0.
16211600
*/
1622-
if (iter->last_name && !strcmp(iter->log.refname, iter->last_name))
1601+
if (!strcmp(iter->log.refname, iter->last_name.buf))
16231602
continue;
16241603

16251604
if (check_refname_format(iter->log.refname,
16261605
REFNAME_ALLOW_ONELEVEL))
16271606
continue;
16281607

1629-
free(iter->last_name);
1630-
iter->last_name = xstrdup(iter->log.refname);
1608+
strbuf_reset(&iter->last_name);
1609+
strbuf_addstr(&iter->last_name, iter->log.refname);
16311610
iter->base.refname = iter->log.refname;
16321611

16331612
break;
@@ -1660,7 +1639,7 @@ static int reftable_reflog_iterator_abort(struct ref_iterator *ref_iterator)
16601639
(struct reftable_reflog_iterator *)ref_iterator;
16611640
reftable_log_record_release(&iter->log);
16621641
reftable_iterator_destroy(&iter->iter);
1663-
free(iter->last_name);
1642+
strbuf_release(&iter->last_name);
16641643
free(iter);
16651644
return ITER_DONE;
16661645
}
@@ -1680,13 +1659,14 @@ static struct reftable_reflog_iterator *reflog_iterator_for_stack(struct reftabl
16801659

16811660
iter = xcalloc(1, sizeof(*iter));
16821661
base_ref_iterator_init(&iter->base, &reftable_reflog_iterator_vtable);
1662+
strbuf_init(&iter->last_name, 0);
16831663
iter->refs = refs;
16841664

16851665
ret = refs->err;
16861666
if (ret)
16871667
goto done;
16881668

1689-
ret = reftable_stack_reload(refs->main_stack);
1669+
ret = reftable_stack_reload(stack);
16901670
if (ret < 0)
16911671
goto done;
16921672

@@ -2184,7 +2164,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
21842164
dest->value_type = REFTABLE_LOG_DELETION;
21852165
} else {
21862166
if ((flags & EXPIRE_REFLOGS_REWRITE) && last_hash)
2187-
dest->value.update.old_hash = last_hash;
2167+
memcpy(dest->value.update.old_hash, last_hash, GIT_MAX_RAWSZ);
21882168
last_hash = logs[i].value.update.new_hash;
21892169
}
21902170
}

reftable/block.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,8 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
332332
return REFTABLE_FORMAT_ERROR;
333333

334334
string_view_consume(&in, n);
335-
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size);
335+
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size,
336+
&it->scratch);
336337
if (n < 0)
337338
return -1;
338339
string_view_consume(&in, n);
@@ -369,6 +370,7 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want)
369370
void block_iter_close(struct block_iter *it)
370371
{
371372
strbuf_release(&it->last_key);
373+
strbuf_release(&it->scratch);
372374
}
373375

374376
int block_reader_seek(struct block_reader *br, struct block_iter *it,

reftable/block.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,12 @@ struct block_iter {
8484

8585
/* key for last entry we read. */
8686
struct strbuf last_key;
87+
struct strbuf scratch;
8788
};
8889

8990
#define BLOCK_ITER_INIT { \
9091
.last_key = STRBUF_INIT, \
92+
.scratch = STRBUF_INIT, \
9193
}
9294

9395
/* initializes a block reader. */

reftable/merged_test.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,13 @@ merged_table_from_log_records(struct reftable_log_record **logs,
289289

290290
static void test_merged_logs(void)
291291
{
292-
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
293-
uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
294-
uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 };
295292
struct reftable_log_record r1[] = {
296293
{
297294
.refname = "a",
298295
.update_index = 2,
299296
.value_type = REFTABLE_LOG_UPDATE,
300297
.value.update = {
301-
.old_hash = hash2,
298+
.old_hash = { 2 },
302299
/* deletion */
303300
.name = "jane doe",
304301
.email = "jane@invalid",
@@ -310,8 +307,8 @@ static void test_merged_logs(void)
310307
.update_index = 1,
311308
.value_type = REFTABLE_LOG_UPDATE,
312309
.value.update = {
313-
.old_hash = hash1,
314-
.new_hash = hash2,
310+
.old_hash = { 1 },
311+
.new_hash = { 2 },
315312
.name = "jane doe",
316313
.email = "jane@invalid",
317314
.message = "message1",
@@ -324,7 +321,7 @@ static void test_merged_logs(void)
324321
.update_index = 3,
325322
.value_type = REFTABLE_LOG_UPDATE,
326323
.value.update = {
327-
.new_hash = hash3,
324+
.new_hash = { 3 },
328325
.name = "jane doe",
329326
.email = "jane@invalid",
330327
.message = "message3",

reftable/readwrite_test.c

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,15 @@ static void write_table(char ***names, struct strbuf *buf, int N,
7777
}
7878

7979
for (i = 0; i < N; i++) {
80-
uint8_t hash[GIT_SHA256_RAWSZ] = { 0 };
8180
char name[100];
8281
int n;
8382

84-
set_test_hash(hash, i);
85-
8683
snprintf(name, sizeof(name), "refs/heads/branch%02d", i);
8784

8885
log.refname = name;
8986
log.update_index = update_index;
9087
log.value_type = REFTABLE_LOG_UPDATE;
91-
log.value.update.new_hash = hash;
88+
set_test_hash(log.value.update.new_hash, i);
9289
log.value.update.message = "message";
9390

9491
n = reftable_writer_add_log(w, &log);
@@ -137,13 +134,10 @@ static void test_log_buffer_size(void)
137134
/* This tests buffer extension for log compression. Must use a random
138135
hash, to ensure that the compressed part is larger than the original.
139136
*/
140-
uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
141137
for (i = 0; i < GIT_SHA1_RAWSZ; i++) {
142-
hash1[i] = (uint8_t)(git_rand() % 256);
143-
hash2[i] = (uint8_t)(git_rand() % 256);
138+
log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256);
139+
log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256);
144140
}
145-
log.value.update.old_hash = hash1;
146-
log.value.update.new_hash = hash2;
147141
reftable_writer_set_limits(w, update_index, update_index);
148142
err = reftable_writer_add_log(w, &log);
149143
EXPECT_ERR(err);
@@ -161,25 +155,26 @@ static void test_log_overflow(void)
161155
.block_size = ARRAY_SIZE(msg),
162156
};
163157
int err;
164-
struct reftable_log_record
165-
log = { .refname = "refs/heads/master",
166-
.update_index = 0xa,
167-
.value_type = REFTABLE_LOG_UPDATE,
168-
.value = { .update = {
169-
.name = "Han-Wen Nienhuys",
170-
.email = "[email protected]",
171-
.tz_offset = 100,
172-
.time = 0x5e430672,
173-
.message = msg,
174-
} } };
158+
struct reftable_log_record log = {
159+
.refname = "refs/heads/master",
160+
.update_index = 0xa,
161+
.value_type = REFTABLE_LOG_UPDATE,
162+
.value = {
163+
.update = {
164+
.old_hash = { 1 },
165+
.new_hash = { 2 },
166+
.name = "Han-Wen Nienhuys",
167+
.email = "[email protected]",
168+
.tz_offset = 100,
169+
.time = 0x5e430672,
170+
.message = msg,
171+
},
172+
},
173+
};
175174
struct reftable_writer *w =
176175
reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);
177176

178-
uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 };
179-
180177
memset(msg, 'x', sizeof(msg) - 1);
181-
log.value.update.old_hash = hash1;
182-
log.value.update.new_hash = hash2;
183178
reftable_writer_set_limits(w, update_index, update_index);
184179
err = reftable_writer_add_log(w, &log);
185180
EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR);
@@ -219,16 +214,13 @@ static void test_log_write_read(void)
219214
EXPECT_ERR(err);
220215
}
221216
for (i = 0; i < N; i++) {
222-
uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
223217
struct reftable_log_record log = { NULL };
224-
set_test_hash(hash1, i);
225-
set_test_hash(hash2, i + 1);
226218

227219
log.refname = names[i];
228220
log.update_index = i;
229221
log.value_type = REFTABLE_LOG_UPDATE;
230-
log.value.update.old_hash = hash1;
231-
log.value.update.new_hash = hash2;
222+
set_test_hash(log.value.update.old_hash, i);
223+
set_test_hash(log.value.update.new_hash, i + 1);
232224

233225
err = reftable_writer_add_log(w, &log);
234226
EXPECT_ERR(err);
@@ -298,18 +290,15 @@ static void test_log_zlib_corruption(void)
298290
struct reftable_writer *w =
299291
reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);
300292
const struct reftable_stats *stats = NULL;
301-
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
302-
uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
303293
char message[100] = { 0 };
304294
int err, i, n;
305-
306295
struct reftable_log_record log = {
307296
.refname = "refname",
308297
.value_type = REFTABLE_LOG_UPDATE,
309298
.value = {
310299
.update = {
311-
.new_hash = hash1,
312-
.old_hash = hash2,
300+
.new_hash = { 1 },
301+
.old_hash = { 2 },
313302
.name = "My Name",
314303
.email = "myname@invalid",
315304
.message = message,
@@ -821,13 +810,12 @@ static void test_write_multiple_indices(void)
821810
}
822811

823812
for (i = 0; i < 100; i++) {
824-
unsigned char hash[GIT_SHA1_RAWSZ] = {i};
825813
struct reftable_log_record log = {
826814
.update_index = 1,
827815
.value_type = REFTABLE_LOG_UPDATE,
828816
.value.update = {
829-
.old_hash = hash,
830-
.new_hash = hash,
817+
.old_hash = { i },
818+
.new_hash = { i },
831819
},
832820
};
833821

0 commit comments

Comments
 (0)