Skip to content

Commit eb804cd

Browse files
committed
Merge branch 'ns/core-fsyncmethod'
Replace core.fsyncObjectFiles with two new configuration variables, core.fsync and core.fsyncMethod. * ns/core-fsyncmethod: core.fsync: documentation and user-friendly aggregate options core.fsync: new option to harden the index core.fsync: add configuration parsing core.fsync: introduce granular fsync control infrastructure core.fsyncmethod: add writeout-only mode wrapper: make inclusion of Windows csprng header tightly scoped
2 parents a68dfad + b9f5d03 commit eb804cd

26 files changed

+444
-60
lines changed

Documentation/config/core.txt

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -547,13 +547,63 @@ core.whitespace::
547547
is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
548548
errors. The default tab width is 8. Allowed values are 1 to 63.
549549

550+
core.fsync::
551+
A comma-separated list of components of the repository that
552+
should be hardened via the core.fsyncMethod when created or
553+
modified. You can disable hardening of any component by
554+
prefixing it with a '-'. Items that are not hardened may be
555+
lost in the event of an unclean system shutdown. Unless you
556+
have special requirements, it is recommended that you leave
557+
this option empty or pick one of `committed`, `added`,
558+
or `all`.
559+
+
560+
When this configuration is encountered, the set of components starts with
561+
the platform default value, disabled components are removed, and additional
562+
components are added. `none` resets the state so that the platform default
563+
is ignored.
564+
+
565+
The empty string resets the fsync configuration to the platform
566+
default. The default on most platforms is equivalent to
567+
`core.fsync=committed,-loose-object`, which has good performance,
568+
but risks losing recent work in the event of an unclean system shutdown.
569+
+
570+
* `none` clears the set of fsynced components.
571+
* `loose-object` hardens objects added to the repo in loose-object form.
572+
* `pack` hardens objects added to the repo in packfile form.
573+
* `pack-metadata` hardens packfile bitmaps and indexes.
574+
* `commit-graph` hardens the commit graph file.
575+
* `index` hardens the index when it is modified.
576+
* `objects` is an aggregate option that is equivalent to
577+
`loose-object,pack`.
578+
* `derived-metadata` is an aggregate option that is equivalent to
579+
`pack-metadata,commit-graph`.
580+
* `committed` is an aggregate option that is currently equivalent to
581+
`objects`. This mode sacrifices some performance to ensure that work
582+
that is committed to the repository with `git commit` or similar commands
583+
is hardened.
584+
* `added` is an aggregate option that is currently equivalent to
585+
`committed,index`. This mode sacrifices additional performance to
586+
ensure that the results of commands like `git add` and similar operations
587+
are hardened.
588+
* `all` is an aggregate option that syncs all individual components above.
589+
590+
core.fsyncMethod::
591+
A value indicating the strategy Git will use to harden repository data
592+
using fsync and related primitives.
593+
+
594+
* `fsync` uses the fsync() system call or platform equivalents.
595+
* `writeout-only` issues pagecache writeback requests, but depending on the
596+
filesystem and storage hardware, data added to the repository may not be
597+
durable in the event of a system crash. This is the default mode on macOS.
598+
550599
core.fsyncObjectFiles::
551600
This boolean will enable 'fsync()' when writing object files.
601+
This setting is deprecated. Use core.fsync instead.
552602
+
553-
This is a total waste of time and effort on a filesystem that orders
554-
data writes properly, but can be useful for filesystems that do not use
555-
journalling (traditional UNIX filesystems) or that only journal metadata
556-
and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
603+
This setting affects data added to the Git repository in loose-object
604+
form. When set to true, Git will issue an fsync or similar system call
605+
to flush caches so that loose-objects remain consistent in the face
606+
of a unclean system shutdown.
557607

558608
core.preloadIndex::
559609
Enable parallel index preload for operations like 'git diff'

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,8 @@ include shared.mak
414414
#
415415
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
416416
#
417+
# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
418+
#
417419
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
418420
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
419421
#
@@ -1918,6 +1920,10 @@ ifdef HAVE_CLOCK_MONOTONIC
19181920
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
19191921
endif
19201922

1923+
ifdef HAVE_SYNC_FILE_RANGE
1924+
BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
1925+
endif
1926+
19211927
ifdef NEEDS_LIBRT
19221928
EXTLIBS += -lrt
19231929
endif

builtin/fast-import.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ static void end_packfile(void)
865865
struct tag *t;
866866

867867
close_pack_windows(pack_data);
868-
finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
868+
finalize_hashfile(pack_file, cur_pack_oid.hash, FSYNC_COMPONENT_PACK, 0);
869869
fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
870870
pack_data->pack_name, object_count,
871871
cur_pack_oid.hash, pack_size);

builtin/index-pack.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
12911291
nr_objects - nr_objects_initial);
12921292
stop_progress_msg(&progress, msg.buf);
12931293
strbuf_release(&msg);
1294-
finalize_hashfile(f, tail_hash, 0);
1294+
finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
12951295
hashcpy(read_hash, pack_hash);
12961296
fixup_pack_header_footer(output_fd, pack_hash,
12971297
curr_pack, nr_objects,
@@ -1513,7 +1513,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
15131513
if (!from_stdin) {
15141514
close(input_fd);
15151515
} else {
1516-
fsync_or_die(output_fd, curr_pack_name);
1516+
fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
15171517
err = close(output_fd);
15181518
if (err)
15191519
die_errno(_("error while closing pack file"));

builtin/pack-objects.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,16 +1199,26 @@ static void write_pack_file(void)
11991199
display_progress(progress_state, written);
12001200
}
12011201

1202-
/*
1203-
* Did we write the wrong # entries in the header?
1204-
* If so, rewrite it like in fast-import
1205-
*/
12061202
if (pack_to_stdout) {
1207-
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1203+
/*
1204+
* We never fsync when writing to stdout since we may
1205+
* not be writing to an actual pack file. For instance,
1206+
* the upload-pack code passes a pipe here. Calling
1207+
* fsync on a pipe results in unnecessary
1208+
* synchronization with the reader on some platforms.
1209+
*/
1210+
finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1211+
CSUM_HASH_IN_STREAM | CSUM_CLOSE);
12081212
} else if (nr_written == nr_remaining) {
1209-
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1213+
finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1214+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
12101215
} else {
1211-
int fd = finalize_hashfile(f, hash, 0);
1216+
/*
1217+
* If we wrote the wrong number of entries in the
1218+
* header, rewrite it like in fast-import.
1219+
*/
1220+
1221+
int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
12121222
fixup_pack_header_footer(fd, hash, pack_tmp_name,
12131223
nr_written, hash, offset);
12141224
close(fd);

bulk-checkin.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
5353
unlink(state->pack_tmp_name);
5454
goto clear_exit;
5555
} else if (state->nr_written == 1) {
56-
finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
56+
finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
57+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
5758
} else {
58-
int fd = finalize_hashfile(state->f, hash, 0);
59+
int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
5960
fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
6061
state->nr_written, hash,
6162
state->offset);

cache.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,8 +993,54 @@ void reset_shared_repository(void);
993993
extern int read_replace_refs;
994994
extern char *git_replace_ref_base;
995995

996+
/*
997+
* These values are used to help identify parts of a repository to fsync.
998+
* FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
999+
* repository and so shouldn't be fsynced.
1000+
*/
1001+
enum fsync_component {
1002+
FSYNC_COMPONENT_NONE,
1003+
FSYNC_COMPONENT_LOOSE_OBJECT = 1 << 0,
1004+
FSYNC_COMPONENT_PACK = 1 << 1,
1005+
FSYNC_COMPONENT_PACK_METADATA = 1 << 2,
1006+
FSYNC_COMPONENT_COMMIT_GRAPH = 1 << 3,
1007+
FSYNC_COMPONENT_INDEX = 1 << 4,
1008+
};
1009+
1010+
#define FSYNC_COMPONENTS_OBJECTS (FSYNC_COMPONENT_LOOSE_OBJECT | \
1011+
FSYNC_COMPONENT_PACK)
1012+
1013+
#define FSYNC_COMPONENTS_DERIVED_METADATA (FSYNC_COMPONENT_PACK_METADATA | \
1014+
FSYNC_COMPONENT_COMMIT_GRAPH)
1015+
1016+
#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENTS_OBJECTS | \
1017+
FSYNC_COMPONENTS_DERIVED_METADATA | \
1018+
~FSYNC_COMPONENT_LOOSE_OBJECT)
1019+
1020+
#define FSYNC_COMPONENTS_COMMITTED (FSYNC_COMPONENTS_OBJECTS)
1021+
1022+
#define FSYNC_COMPONENTS_ADDED (FSYNC_COMPONENTS_COMMITTED | \
1023+
FSYNC_COMPONENT_INDEX)
1024+
1025+
#define FSYNC_COMPONENTS_ALL (FSYNC_COMPONENT_LOOSE_OBJECT | \
1026+
FSYNC_COMPONENT_PACK | \
1027+
FSYNC_COMPONENT_PACK_METADATA | \
1028+
FSYNC_COMPONENT_COMMIT_GRAPH | \
1029+
FSYNC_COMPONENT_INDEX)
1030+
1031+
/*
1032+
* A bitmask indicating which components of the repo should be fsynced.
1033+
*/
1034+
extern enum fsync_component fsync_components;
9961035
extern int fsync_object_files;
9971036
extern int use_fsync;
1037+
1038+
enum fsync_method {
1039+
FSYNC_METHOD_FSYNC,
1040+
FSYNC_METHOD_WRITEOUT_ONLY
1041+
};
1042+
1043+
extern enum fsync_method fsync_method;
9981044
extern int core_preload_index;
9991045
extern int precomposed_unicode;
10001046
extern int protect_hfs;
@@ -1715,6 +1761,8 @@ int copy_file_with_time(const char *dst, const char *src, int mode);
17151761

17161762
void write_or_die(int fd, const void *buf, size_t count);
17171763
void fsync_or_die(int fd, const char *);
1764+
int fsync_component(enum fsync_component component, int fd);
1765+
void fsync_component_or_die(enum fsync_component component, int fd, const char *msg);
17181766

17191767
ssize_t read_in_full(int fd, void *buf, size_t count);
17201768
ssize_t write_in_full(int fd, const void *buf, size_t count);

commit-graph.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
19521952
}
19531953

19541954
close_commit_graph(ctx->r->objects);
1955-
finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1955+
finalize_hashfile(f, file_hash, FSYNC_COMPONENT_COMMIT_GRAPH,
1956+
CSUM_HASH_IN_STREAM | CSUM_FSYNC);
19561957
free_chunkfile(cf);
19571958

19581959
if (ctx->split) {

compat/mingw.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,9 @@ int mingw_getpagesize(void);
329329
#define getpagesize mingw_getpagesize
330330
#endif
331331

332+
int win32_fsync_no_flush(int fd);
333+
#define fsync_no_flush win32_fsync_no_flush
334+
332335
struct rlimit {
333336
unsigned int rlim_cur;
334337
};

compat/win32/flush.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#include "git-compat-util.h"
2+
#include <winternl.h>
3+
#include "lazyload.h"
4+
5+
int win32_fsync_no_flush(int fd)
6+
{
7+
IO_STATUS_BLOCK io_status;
8+
9+
#define FLUSH_FLAGS_FILE_DATA_ONLY 1
10+
11+
DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx,
12+
HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
13+
PIO_STATUS_BLOCK IoStatusBlock);
14+
15+
if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
16+
errno = ENOSYS;
17+
return -1;
18+
}
19+
20+
memset(&io_status, 0, sizeof(io_status));
21+
if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
22+
NULL, 0, &io_status)) {
23+
errno = EINVAL;
24+
return -1;
25+
}
26+
27+
return 0;
28+
}

0 commit comments

Comments
 (0)