Skip to content

Commit 020406e

Browse files
neerajsi-msftgitster
authored andcommitted
core.fsync: introduce granular fsync control infrastructure
This commit introduces the infrastructure for the core.fsync configuration knob. The repository components we want to sync are identified by flags so that we can turn on or off syncing for specific components. If core.fsyncObjectFiles is set and the core.fsync configuration also includes FSYNC_COMPONENT_LOOSE_OBJECT, we will fsync any loose objects. This picks the strictest data integrity behavior if core.fsync and core.fsyncObjectFiles are set to conflicting values. This change introduces the currently unused fsync_component helper, which will be used by a later patch that adds fsyncing to the refs backend. Actual configuration and documentation of the fsync components list are in other patches in the series to separate review of the underlying mechanism from the policy of how it's configured. Helped-by: Patrick Steinhardt <[email protected]> Signed-off-by: Neeraj Singh <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent abf38ab commit 020406e

15 files changed

+97
-33
lines changed

builtin/fast-import.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ static void end_packfile(void)
865865
struct tag *t;
866866

867867
close_pack_windows(pack_data);
868-
finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
868+
finalize_hashfile(pack_file, cur_pack_oid.hash, FSYNC_COMPONENT_PACK, 0);
869869
fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
870870
pack_data->pack_name, object_count,
871871
cur_pack_oid.hash, pack_size);

builtin/index-pack.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
12901290
nr_objects - nr_objects_initial);
12911291
stop_progress_msg(&progress, msg.buf);
12921292
strbuf_release(&msg);
1293-
finalize_hashfile(f, tail_hash, 0);
1293+
finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
12941294
hashcpy(read_hash, pack_hash);
12951295
fixup_pack_header_footer(output_fd, pack_hash,
12961296
curr_pack, nr_objects,
@@ -1512,7 +1512,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
15121512
if (!from_stdin) {
15131513
close(input_fd);
15141514
} else {
1515-
fsync_or_die(output_fd, curr_pack_name);
1515+
fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
15161516
err = close(output_fd);
15171517
if (err)
15181518
die_errno(_("error while closing pack file"));

builtin/pack-objects.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,16 +1199,26 @@ static void write_pack_file(void)
11991199
display_progress(progress_state, written);
12001200
}
12011201

1202-
/*
1203-
* Did we write the wrong # entries in the header?
1204-
* If so, rewrite it like in fast-import
1205-
*/
12061202
if (pack_to_stdout) {
1207-
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1203+
/*
1204+
* We never fsync when writing to stdout since we may
1205+
* not be writing to an actual pack file. For instance,
1206+
* the upload-pack code passes a pipe here. Calling
1207+
* fsync on a pipe results in unnecessary
1208+
* synchronization with the reader on some platforms.
1209+
*/
1210+
finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1211+
CSUM_HASH_IN_STREAM | CSUM_CLOSE);
12081212
} else if (nr_written == nr_remaining) {
1209-
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1213+
finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1214+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
12101215
} else {
1211-
int fd = finalize_hashfile(f, hash, 0);
1216+
/*
1217+
* If we wrote the wrong number of entries in the
1218+
* header, rewrite it like in fast-import.
1219+
*/
1220+
1221+
int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
12121222
fixup_pack_header_footer(fd, hash, pack_tmp_name,
12131223
nr_written, hash, offset);
12141224
close(fd);

bulk-checkin.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
5353
unlink(state->pack_tmp_name);
5454
goto clear_exit;
5555
} else if (state->nr_written == 1) {
56-
finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
56+
finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
57+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
5758
} else {
58-
int fd = finalize_hashfile(state->f, hash, 0);
59+
int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
5960
fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
6061
state->nr_written, hash,
6162
state->offset);

cache.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,27 @@ void reset_shared_repository(void);
993993
extern int read_replace_refs;
994994
extern char *git_replace_ref_base;
995995

996+
/*
997+
* These values are used to help identify parts of a repository to fsync.
998+
* FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
999+
* repository and so shouldn't be fsynced.
1000+
*/
1001+
enum fsync_component {
1002+
FSYNC_COMPONENT_NONE,
1003+
FSYNC_COMPONENT_LOOSE_OBJECT = 1 << 0,
1004+
FSYNC_COMPONENT_PACK = 1 << 1,
1005+
FSYNC_COMPONENT_PACK_METADATA = 1 << 2,
1006+
FSYNC_COMPONENT_COMMIT_GRAPH = 1 << 3,
1007+
};
1008+
1009+
#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
1010+
FSYNC_COMPONENT_PACK_METADATA | \
1011+
FSYNC_COMPONENT_COMMIT_GRAPH)
1012+
1013+
/*
1014+
* A bitmask indicating which components of the repo should be fsynced.
1015+
*/
1016+
extern enum fsync_component fsync_components;
9961017
extern int fsync_object_files;
9971018
extern int use_fsync;
9981019

@@ -1707,6 +1728,8 @@ int copy_file_with_time(const char *dst, const char *src, int mode);
17071728

17081729
void write_or_die(int fd, const void *buf, size_t count);
17091730
void fsync_or_die(int fd, const char *);
1731+
int fsync_component(enum fsync_component component, int fd);
1732+
void fsync_component_or_die(enum fsync_component component, int fd, const char *msg);
17101733

17111734
ssize_t read_in_full(int fd, void *buf, size_t count);
17121735
ssize_t write_in_full(int fd, const void *buf, size_t count);

commit-graph.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1942,7 +1942,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
19421942
}
19431943

19441944
close_commit_graph(ctx->r->objects);
1945-
finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1945+
finalize_hashfile(f, file_hash, FSYNC_COMPONENT_COMMIT_GRAPH,
1946+
CSUM_HASH_IN_STREAM | CSUM_FSYNC);
19461947
free_chunkfile(cf);
19471948

19481949
if (ctx->split) {

csum-file.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ static void free_hashfile(struct hashfile *f)
5858
free(f);
5959
}
6060

61-
int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
61+
int finalize_hashfile(struct hashfile *f, unsigned char *result,
62+
enum fsync_component component, unsigned int flags)
6263
{
6364
int fd;
6465

@@ -69,7 +70,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
6970
if (flags & CSUM_HASH_IN_STREAM)
7071
flush(f, f->buffer, the_hash_algo->rawsz);
7172
if (flags & CSUM_FSYNC)
72-
fsync_or_die(f->fd, f->name);
73+
fsync_component_or_die(component, f->fd, f->name);
7374
if (flags & CSUM_CLOSE) {
7475
if (close(f->fd))
7576
die_errno("%s: sha1 file error on close", f->name);

csum-file.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef CSUM_FILE_H
22
#define CSUM_FILE_H
33

4+
#include "cache.h"
45
#include "hash.h"
56

67
struct progress;
@@ -38,7 +39,7 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
3839
struct hashfile *hashfd(int fd, const char *name);
3940
struct hashfile *hashfd_check(const char *name);
4041
struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp);
41-
int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
42+
int finalize_hashfile(struct hashfile *, unsigned char *, enum fsync_component, unsigned int);
4243
void hashwrite(struct hashfile *, const void *, unsigned int);
4344
void hashflush(struct hashfile *f);
4445
void crc32_begin(struct hashfile *);

environment.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ int pack_compression_level = Z_DEFAULT_COMPRESSION;
4545
int fsync_object_files;
4646
int use_fsync = -1;
4747
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
48+
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
4849
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
4950
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
5051
size_t delta_base_cache_limit = 96 * 1024 * 1024;

midx.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1438,7 +1438,8 @@ static int write_midx_internal(const char *object_dir,
14381438
write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
14391439
write_chunkfile(cf, &ctx);
14401440

1441-
finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
1441+
finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
1442+
CSUM_FSYNC | CSUM_HASH_IN_STREAM);
14421443
free_chunkfile(cf);
14431444

14441445
if (flags & MIDX_WRITE_REV_INDEX &&

0 commit comments

Comments
 (0)