Skip to content

Commit 5aec723

Browse files
committed
Merge branch 'ps/reftable-write-optim'
Code to write out reftable has seen some optimization and simplification. * ps/reftable-write-optim: reftable/block: reuse compressed array reftable/block: reuse zstream when writing log blocks reftable/writer: reset `last_key` instead of releasing it reftable/writer: unify releasing memory reftable/writer: refactorings for `writer_flush_nonempty_block()` reftable/writer: refactorings for `writer_add_record()` refs/reftable: don't recompute committer ident reftable: remove name checks refs/reftable: skip duplicate name checks refs/reftable: perform explicit D/F check when writing symrefs refs/reftable: fix D/F conflict error message on ref copy
2 parents d4cc1ec + fa74f32 commit 5aec723

16 files changed

+230
-556
lines changed

Makefile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,7 +2665,6 @@ REFTABLE_OBJS += reftable/merged.o
26652665
REFTABLE_OBJS += reftable/pq.o
26662666
REFTABLE_OBJS += reftable/reader.o
26672667
REFTABLE_OBJS += reftable/record.o
2668-
REFTABLE_OBJS += reftable/refname.o
26692668
REFTABLE_OBJS += reftable/generic.o
26702669
REFTABLE_OBJS += reftable/stack.o
26712670
REFTABLE_OBJS += reftable/tree.o
@@ -2678,7 +2677,6 @@ REFTABLE_TEST_OBJS += reftable/merged_test.o
26782677
REFTABLE_TEST_OBJS += reftable/pq_test.o
26792678
REFTABLE_TEST_OBJS += reftable/record_test.o
26802679
REFTABLE_TEST_OBJS += reftable/readwrite_test.o
2681-
REFTABLE_TEST_OBJS += reftable/refname_test.o
26822680
REFTABLE_TEST_OBJS += reftable/stack_test.o
26832681
REFTABLE_TEST_OBJS += reftable/test_framework.o
26842682
REFTABLE_TEST_OBJS += reftable/tree_test.o

refs/reftable-backend.c

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -172,32 +172,30 @@ static int should_write_log(struct ref_store *refs, const char *refname)
172172
}
173173
}
174174

175-
static void fill_reftable_log_record(struct reftable_log_record *log)
175+
static void fill_reftable_log_record(struct reftable_log_record *log, const struct ident_split *split)
176176
{
177-
const char *info = git_committer_info(0);
178-
struct ident_split split = {0};
177+
const char *tz_begin;
179178
int sign = 1;
180179

181-
if (split_ident_line(&split, info, strlen(info)))
182-
BUG("failed splitting committer info");
183-
184180
reftable_log_record_release(log);
185181
log->value_type = REFTABLE_LOG_UPDATE;
186182
log->value.update.name =
187-
xstrndup(split.name_begin, split.name_end - split.name_begin);
183+
xstrndup(split->name_begin, split->name_end - split->name_begin);
188184
log->value.update.email =
189-
xstrndup(split.mail_begin, split.mail_end - split.mail_begin);
190-
log->value.update.time = atol(split.date_begin);
191-
if (*split.tz_begin == '-') {
185+
xstrndup(split->mail_begin, split->mail_end - split->mail_begin);
186+
log->value.update.time = atol(split->date_begin);
187+
188+
tz_begin = split->tz_begin;
189+
if (*tz_begin == '-') {
192190
sign = -1;
193-
split.tz_begin++;
191+
tz_begin++;
194192
}
195-
if (*split.tz_begin == '+') {
193+
if (*tz_begin == '+') {
196194
sign = 1;
197-
split.tz_begin++;
195+
tz_begin++;
198196
}
199197

200-
log->value.update.tz_offset = sign * atoi(split.tz_begin);
198+
log->value.update.tz_offset = sign * atoi(tz_begin);
201199
}
202200

203201
static int read_ref_without_reload(struct reftable_stack *stack,
@@ -1021,9 +1019,15 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
10211019
reftable_stack_merged_table(arg->stack);
10221020
uint64_t ts = reftable_stack_next_update_index(arg->stack);
10231021
struct reftable_log_record *logs = NULL;
1022+
struct ident_split committer_ident = {0};
10241023
size_t logs_nr = 0, logs_alloc = 0, i;
1024+
const char *committer_info;
10251025
int ret = 0;
10261026

1027+
committer_info = git_committer_info(0);
1028+
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
1029+
BUG("failed splitting committer info");
1030+
10271031
QSORT(arg->updates, arg->updates_nr, transaction_update_cmp);
10281032

10291033
reftable_writer_set_limits(writer, ts, ts);
@@ -1089,7 +1093,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
10891093
log = &logs[logs_nr++];
10901094
memset(log, 0, sizeof(*log));
10911095

1092-
fill_reftable_log_record(log);
1096+
fill_reftable_log_record(log, &committer_ident);
10931097
log->update_index = ts;
10941098
log->refname = xstrdup(u->refname);
10951099
memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
@@ -1227,6 +1231,7 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
12271231
struct write_create_symref_arg {
12281232
struct reftable_ref_store *refs;
12291233
struct reftable_stack *stack;
1234+
struct strbuf *err;
12301235
const char *refname;
12311236
const char *target;
12321237
const char *logmsg;
@@ -1242,13 +1247,20 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
12421247
.value.symref = (char *)create->target,
12431248
.update_index = ts,
12441249
};
1250+
struct ident_split committer_ident = {0};
12451251
struct reftable_log_record log = {0};
12461252
struct object_id new_oid;
12471253
struct object_id old_oid;
1254+
const char *committer_info;
12481255
int ret;
12491256

12501257
reftable_writer_set_limits(writer, ts, ts);
12511258

1259+
ret = refs_verify_refname_available(&create->refs->base, create->refname,
1260+
NULL, NULL, create->err);
1261+
if (ret < 0)
1262+
return ret;
1263+
12521264
ret = reftable_writer_add_ref(writer, &ref);
12531265
if (ret)
12541266
return ret;
@@ -1267,7 +1279,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
12671279
!should_write_log(&create->refs->base, create->refname))
12681280
return 0;
12691281

1270-
fill_reftable_log_record(&log);
1282+
committer_info = git_committer_info(0);
1283+
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
1284+
BUG("failed splitting committer info");
1285+
1286+
fill_reftable_log_record(&log, &committer_ident);
12711287
log.refname = xstrdup(create->refname);
12721288
log.update_index = ts;
12731289
log.value.update.message = xstrndup(create->logmsg,
@@ -1290,12 +1306,14 @@ static int reftable_be_create_symref(struct ref_store *ref_store,
12901306
struct reftable_ref_store *refs =
12911307
reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_symref");
12921308
struct reftable_stack *stack = stack_for(refs, refname, &refname);
1309+
struct strbuf err = STRBUF_INIT;
12931310
struct write_create_symref_arg arg = {
12941311
.refs = refs,
12951312
.stack = stack,
12961313
.refname = refname,
12971314
.target = target,
12981315
.logmsg = logmsg,
1316+
.err = &err,
12991317
};
13001318
int ret;
13011319

@@ -1311,9 +1329,15 @@ static int reftable_be_create_symref(struct ref_store *ref_store,
13111329

13121330
done:
13131331
assert(ret != REFTABLE_API_ERROR);
1314-
if (ret)
1315-
error("unable to write symref for %s: %s", refname,
1316-
reftable_error_str(ret));
1332+
if (ret) {
1333+
if (err.len)
1334+
error("%s", err.buf);
1335+
else
1336+
error("unable to write symref for %s: %s", refname,
1337+
reftable_error_str(ret));
1338+
}
1339+
1340+
strbuf_release(&err);
13171341
return ret;
13181342
}
13191343

@@ -1335,10 +1359,16 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
13351359
struct reftable_log_record old_log = {0}, *logs = NULL;
13361360
struct reftable_iterator it = {0};
13371361
struct string_list skip = STRING_LIST_INIT_NODUP;
1362+
struct ident_split committer_ident = {0};
13381363
struct strbuf errbuf = STRBUF_INIT;
13391364
size_t logs_nr = 0, logs_alloc = 0, i;
1365+
const char *committer_info;
13401366
int ret;
13411367

1368+
committer_info = git_committer_info(0);
1369+
if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
1370+
BUG("failed splitting committer info");
1371+
13421372
if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
13431373
ret = error(_("refname %s not found"), arg->oldname);
13441374
goto done;
@@ -1361,7 +1391,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
13611391
/*
13621392
* Verify that the new refname is available.
13631393
*/
1364-
string_list_insert(&skip, arg->oldname);
1394+
if (arg->delete_old)
1395+
string_list_insert(&skip, arg->oldname);
13651396
ret = refs_verify_refname_available(&arg->refs->base, arg->newname,
13661397
NULL, &skip, &errbuf);
13671398
if (ret < 0) {
@@ -1412,7 +1443,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
14121443

14131444
ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
14141445
memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
1415-
fill_reftable_log_record(&logs[logs_nr]);
1446+
fill_reftable_log_record(&logs[logs_nr], &committer_ident);
14161447
logs[logs_nr].refname = (char *)arg->newname;
14171448
logs[logs_nr].update_index = deletion_ts;
14181449
logs[logs_nr].value.update.message =
@@ -1444,7 +1475,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
14441475
*/
14451476
ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
14461477
memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
1447-
fill_reftable_log_record(&logs[logs_nr]);
1478+
fill_reftable_log_record(&logs[logs_nr], &committer_ident);
14481479
logs[logs_nr].refname = (char *)arg->newname;
14491480
logs[logs_nr].update_index = creation_ts;
14501481
logs[logs_nr].value.update.message =

reftable/block.c

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
7676
bw->entries = 0;
7777
bw->restart_len = 0;
7878
bw->last_key.len = 0;
79+
if (!bw->zstream) {
80+
REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
81+
deflateInit(bw->zstream, 9);
82+
}
7983
}
8084

8185
uint8_t block_writer_type(struct block_writer *bw)
@@ -139,39 +143,52 @@ int block_writer_finish(struct block_writer *w)
139143
w->next += 2;
140144
put_be24(w->buf + 1 + w->header_off, w->next);
141145

146+
/*
147+
* Log records are stored zlib-compressed. Note that the compression
148+
* also spans over the restart points we have just written.
149+
*/
142150
if (block_writer_type(w) == BLOCK_TYPE_LOG) {
143151
int block_header_skip = 4 + w->header_off;
144-
uLongf src_len = w->next - block_header_skip;
145-
uLongf dest_cap = src_len * 1.001 + 12;
146-
uint8_t *compressed;
147-
148-
REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
149-
150-
while (1) {
151-
uLongf out_dest_len = dest_cap;
152-
int zresult = compress2(compressed, &out_dest_len,
153-
w->buf + block_header_skip,
154-
src_len, 9);
155-
if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
156-
dest_cap *= 2;
157-
compressed =
158-
reftable_realloc(compressed, dest_cap);
159-
if (compressed)
160-
continue;
161-
}
162-
163-
if (Z_OK != zresult) {
164-
reftable_free(compressed);
165-
return REFTABLE_ZLIB_ERROR;
166-
}
167-
168-
memcpy(w->buf + block_header_skip, compressed,
169-
out_dest_len);
170-
w->next = out_dest_len + block_header_skip;
171-
reftable_free(compressed);
172-
break;
173-
}
152+
uLongf src_len = w->next - block_header_skip, compressed_len;
153+
int ret;
154+
155+
ret = deflateReset(w->zstream);
156+
if (ret != Z_OK)
157+
return REFTABLE_ZLIB_ERROR;
158+
159+
/*
160+
* Precompute the upper bound of how many bytes the compressed
161+
* data may end up with. Combined with `Z_FINISH`, `deflate()`
162+
* is guaranteed to return `Z_STREAM_END`.
163+
*/
164+
compressed_len = deflateBound(w->zstream, src_len);
165+
REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);
166+
167+
w->zstream->next_out = w->compressed;
168+
w->zstream->avail_out = compressed_len;
169+
w->zstream->next_in = w->buf + block_header_skip;
170+
w->zstream->avail_in = src_len;
171+
172+
/*
173+
* We want to perform all decompression in a single step, which
174+
* is why we can pass Z_FINISH here. As we have precomputed the
175+
* deflated buffer's size via `deflateBound()` this function is
176+
* guaranteed to succeed according to the zlib documentation.
177+
*/
178+
ret = deflate(w->zstream, Z_FINISH);
179+
if (ret != Z_STREAM_END)
180+
return REFTABLE_ZLIB_ERROR;
181+
182+
/*
183+
* Overwrite the uncompressed data we have already written and
184+
* adjust the `next` pointer to point right after the
185+
* compressed data.
186+
*/
187+
memcpy(w->buf + block_header_skip, w->compressed,
188+
w->zstream->total_out);
189+
w->next = w->zstream->total_out + block_header_skip;
174190
}
191+
175192
return w->next;
176193
}
177194

@@ -514,7 +531,10 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
514531

515532
void block_writer_release(struct block_writer *bw)
516533
{
534+
deflateEnd(bw->zstream);
535+
FREE_AND_NULL(bw->zstream);
517536
FREE_AND_NULL(bw->restarts);
537+
FREE_AND_NULL(bw->compressed);
518538
strbuf_release(&bw->last_key);
519539
/* the block is not owned. */
520540
}

reftable/block.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ license that can be found in the LICENSE file or at
1818
* allocation overhead.
1919
*/
2020
struct block_writer {
21+
z_stream *zstream;
22+
unsigned char *compressed;
23+
size_t compressed_cap;
24+
2125
uint8_t *buf;
2226
uint32_t block_size;
2327

reftable/error.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ const char *reftable_error_str(int err)
2727
return "misuse of the reftable API";
2828
case REFTABLE_ZLIB_ERROR:
2929
return "zlib failure";
30-
case REFTABLE_NAME_CONFLICT:
31-
return "file/directory conflict";
3230
case REFTABLE_EMPTY_TABLE_ERROR:
3331
return "wrote empty table";
3432
case REFTABLE_REFNAME_ERROR:

0 commit comments

Comments
 (0)