Skip to content

Commit 2ca245f

Browse files
derrickstoleegitster
authored andcommitted
csum-file.h: increase hashfile buffer size
The hashfile API uses a hard-coded buffer size of 8KB and has ever since it was introduced in c38138c (git-pack-objects: write the pack files with a SHA1 csum, 2005-06-26). It performs a similar function to the hashing buffers in read-cache.c, but that code was updated from 8KB to 128KB in f279894 (read-cache: make the index write buffer size 128K, 2021-02-18). The justification there was that do_write_index() improves from 1.02s to 0.72s. Since our end goal is to have the index writing code use the hashfile API, we need to unify this buffer size to avoid a performance regression. There is a buffer, 'check_buffer', that is used to verify the check_fd file descriptor. When this buffer increases to 128K to fit the data being flushed, it causes the stack to overflow the limits placed in the test suite. To avoid issues with stack size, move both 'buffer' and 'check_buffer' to be heap pointers within 'struct hashfile'. The 'check_buffer' member is left as NULL unless check_fd is set in hashfd_check(). Both buffers are cleared as part of finalize_hashfile() which also frees the full structure. Since these buffers are now on the heap, we can adjust their size based on the needs of the consumer. In particular, callers to hashfd_throughput() are expecting to report progress indicators as the buffer flushes. These callers would prefer the smaller 8k buffer to avoid large delays between updates, especially for users with slower networks. When the progress indicator is not used, the larger buffer is preferrable. By adding a new trace2 region in the chunk-format API, we can see that the writing portion of 'git multi-pack-index write' lowers from ~1.49s to ~1.47s on a Linux machine. These effects may be more pronounced or diminished on other filesystems. The end-to-end timing is too noisy to have a definitive change either way. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 68142e1 commit 2ca245f

File tree

3 files changed

+68
-25
lines changed

3 files changed

+68
-25
lines changed

chunk-format.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,11 @@ void add_chunk(struct chunkfile *cf,
5858

5959
int write_chunkfile(struct chunkfile *cf, void *data)
6060
{
61-
int i;
61+
int i, result = 0;
6262
uint64_t cur_offset = hashfile_total(cf->f);
6363

64+
trace2_region_enter("chunkfile", "write", the_repository);
65+
6466
/* Add the table of contents to the current offset */
6567
cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
6668

@@ -77,18 +79,20 @@ int write_chunkfile(struct chunkfile *cf, void *data)
7779

7880
for (i = 0; i < cf->chunks_nr; i++) {
7981
off_t start_offset = hashfile_total(cf->f);
80-
int result = cf->chunks[i].write_fn(cf->f, data);
82+
result = cf->chunks[i].write_fn(cf->f, data);
8183

8284
if (result)
83-
return result;
85+
goto cleanup;
8486

8587
if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
8688
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
8789
cf->chunks[i].size, cf->chunks[i].id,
8890
hashfile_total(cf->f) - start_offset);
8991
}
9092

91-
return 0;
93+
cleanup:
94+
trace2_region_leave("chunkfile", "write", the_repository);
95+
return result;
9296
}
9397

9498
int read_table_of_contents(struct chunkfile *cf,

csum-file.c

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,24 @@
1111
#include "progress.h"
1212
#include "csum-file.h"
1313

14+
static void verify_buffer_or_die(struct hashfile *f,
15+
const void *buf,
16+
unsigned int count)
17+
{
18+
ssize_t ret = read_in_full(f->check_fd, f->check_buffer, count);
19+
20+
if (ret < 0)
21+
die_errno("%s: sha1 file read error", f->name);
22+
if (ret != count)
23+
die("%s: sha1 file truncated", f->name);
24+
if (memcmp(buf, f->check_buffer, count))
25+
die("sha1 file '%s' validation error", f->name);
26+
}
27+
1428
static void flush(struct hashfile *f, const void *buf, unsigned int count)
1529
{
16-
if (0 <= f->check_fd && count) {
17-
unsigned char check_buffer[8192];
18-
ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
19-
20-
if (ret < 0)
21-
die_errno("%s: sha1 file read error", f->name);
22-
if (ret != count)
23-
die("%s: sha1 file truncated", f->name);
24-
if (memcmp(buf, check_buffer, count))
25-
die("sha1 file '%s' validation error", f->name);
26-
}
30+
if (0 <= f->check_fd && count)
31+
verify_buffer_or_die(f, buf, count);
2732

2833
if (write_in_full(f->fd, buf, count) < 0) {
2934
if (errno == ENOSPC)
@@ -46,6 +51,13 @@ void hashflush(struct hashfile *f)
4651
}
4752
}
4853

54+
static void free_hashfile(struct hashfile *f)
55+
{
56+
free(f->buffer);
57+
free(f->check_buffer);
58+
free(f);
59+
}
60+
4961
int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
5062
{
5163
int fd;
@@ -75,20 +87,20 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
7587
if (close(f->check_fd))
7688
die_errno("%s: sha1 file error on close", f->name);
7789
}
78-
free(f);
90+
free_hashfile(f);
7991
return fd;
8092
}
8193

8294
void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
8395
{
8496
while (count) {
85-
unsigned left = sizeof(f->buffer) - f->offset;
97+
unsigned left = f->buffer_len - f->offset;
8698
unsigned nr = count > left ? left : count;
8799

88100
if (f->do_crc)
89101
f->crc32 = crc32(f->crc32, buf, nr);
90102

91-
if (nr == sizeof(f->buffer)) {
103+
if (nr == f->buffer_len) {
92104
/*
93105
* Flush a full batch worth of data directly
94106
* from the input, skipping the memcpy() to
@@ -114,11 +126,6 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
114126
}
115127
}
116128

117-
struct hashfile *hashfd(int fd, const char *name)
118-
{
119-
return hashfd_throughput(fd, name, NULL);
120-
}
121-
122129
struct hashfile *hashfd_check(const char *name)
123130
{
124131
int sink, check;
@@ -132,10 +139,14 @@ struct hashfile *hashfd_check(const char *name)
132139
die_errno("unable to open '%s'", name);
133140
f = hashfd(sink, name);
134141
f->check_fd = check;
142+
f->check_buffer = xmalloc(f->buffer_len);
143+
135144
return f;
136145
}
137146

138-
struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
147+
static struct hashfile *hashfd_internal(int fd, const char *name,
148+
struct progress *tp,
149+
size_t buffer_len)
139150
{
140151
struct hashfile *f = xmalloc(sizeof(*f));
141152
f->fd = fd;
@@ -146,9 +157,35 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
146157
f->name = name;
147158
f->do_crc = 0;
148159
the_hash_algo->init_fn(&f->ctx);
160+
161+
f->buffer_len = buffer_len;
162+
f->buffer = xmalloc(buffer_len);
163+
f->check_buffer = NULL;
164+
149165
return f;
150166
}
151167

168+
struct hashfile *hashfd(int fd, const char *name)
169+
{
170+
/*
171+
* Since we are not going to use a progress meter to
172+
* measure the rate of data passing through this hashfile,
173+
* use a larger buffer size to reduce fsync() calls.
174+
*/
175+
return hashfd_internal(fd, name, NULL, 128 * 1024);
176+
}
177+
178+
struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
179+
{
180+
/*
181+
* Since we are expecting to report progress of the
182+
* write into this hashfile, use a smaller buffer
183+
* size so the progress indicators arrive at a more
184+
* frequent rate.
185+
*/
186+
return hashfd_internal(fd, name, tp, 8 * 1024);
187+
}
188+
152189
void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
153190
{
154191
hashflush(f);

csum-file.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ struct hashfile {
1616
const char *name;
1717
int do_crc;
1818
uint32_t crc32;
19-
unsigned char buffer[8192];
19+
size_t buffer_len;
20+
unsigned char *buffer;
21+
unsigned char *check_buffer;
2022
};
2123

2224
/* Checkpoint */

0 commit comments

Comments
 (0)