Skip to content

Commit c7b190d

Browse files
pks-tgitster
authored andcommitted
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference transaction per reference update and directly commit it. This means that fetches are non-atomic: even if some of the reference updates fail, others may still succeed and modify local references. This is fine in many scenarios, but this strategy has its downsides. - The view of remote references may be inconsistent and may show a bastardized state of the remote repository. - Batching together updates may improve performance in certain scenarios. While the impact probably isn't as pronounced with loose references, the upcoming reftable backend may benefit as it needs to write less files in case the update is batched. - The reference-update hook is currently being executed twice per updated reference. While this doesn't matter when there is no such hook, we have seen severe performance regressions when doing a git-fetch(1) with reference-transaction hook when the remote repository has hundreds of thousands of references. Similar to `git push --atomic`, this commit thus introduces atomic fetches. Instead of allocating one reference transaction per updated reference, it causes us to only allocate a single transaction and commit it as soon as all updates were received. If locking of any reference fails, then we abort the complete transaction and don't update any reference, which gives us an all-or-nothing fetch. Note that this may not completely fix the first of above downsides, as the consistent view also depends on the server-side. If the server doesn't have a consistent view of its own references during the reference negotiation phase, then the client would get the same inconsistent view the server has. This is a separate problem though and, if it actually exists, can be fixed at a later point. This commit also changes the way we write FETCH_HEAD in case `--atomic` is passed. Instead of writing changes as we go, we need to accumulate all changes first and only commit them at the end when we know that all reference updates succeeded. Ideally, we'd just do so via a temporary file so that we don't need to carry all updates in-memory. This isn't trivially doable though considering the `--append` mode, where we do not truncate the file but simply append to it. And given that we support concurrent processes appending to FETCH_HEAD at the same time without any loss of data, seeding the temporary file with current contents of FETCH_HEAD initially and then doing a rename wouldn't work either. So this commit implements the simple strategy of buffering all changes and appending them to the file on commit. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent d4c8db8 commit c7b190d

File tree

3 files changed

+213
-5
lines changed

3 files changed

+213
-5
lines changed

Documentation/fetch-options.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
existing contents of `.git/FETCH_HEAD`. Without this
88
option old data in `.git/FETCH_HEAD` will be overwritten.
99

10+
--atomic::
11+
Use an atomic transaction to update local refs. Either all refs are
12+
updated, or on error, no refs are updated.
13+
1014
--depth=<depth>::
1115
Limit fetching to the specified number of commits from the tip of
1216
each remote branch history. If fetching to a 'shallow' repository

builtin/fetch.c

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ static int enable_auto_gc = 1;
6363
static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
6464
static int max_jobs = -1, submodule_fetch_jobs_config = -1;
6565
static int fetch_parallel_config = 1;
66+
static int atomic_fetch;
6667
static enum transport_family family;
6768
static const char *depth;
6869
static const char *deepen_since;
@@ -144,6 +145,8 @@ static struct option builtin_fetch_options[] = {
144145
N_("set upstream for git pull/fetch")),
145146
OPT_BOOL('a', "append", &append,
146147
N_("append to .git/FETCH_HEAD instead of overwriting")),
148+
OPT_BOOL(0, "atomic", &atomic_fetch,
149+
N_("use atomic transaction to update references")),
147150
OPT_STRING(0, "upload-pack", &upload_pack, N_("path"),
148151
N_("path to upload pack on remote end")),
149152
OPT__FORCE(&force, N_("force overwrite of local reference"), 0),
@@ -970,13 +973,23 @@ static void append_fetch_head(struct fetch_head *fetch_head,
970973
strbuf_addch(&fetch_head->buf, url[i]);
971974
strbuf_addch(&fetch_head->buf, '\n');
972975

973-
strbuf_write(&fetch_head->buf, fetch_head->fp);
974-
strbuf_reset(&fetch_head->buf);
976+
/*
977+
* When using an atomic fetch, we do not want to update FETCH_HEAD if
978+
* any of the reference updates fails. We thus have to write all
979+
* updates to a buffer first and only commit it as soon as all
980+
* references have been successfully updated.
981+
*/
982+
if (!atomic_fetch) {
983+
strbuf_write(&fetch_head->buf, fetch_head->fp);
984+
strbuf_reset(&fetch_head->buf);
985+
}
975986
}
976987

977988
static void commit_fetch_head(struct fetch_head *fetch_head)
978989
{
979-
/* Nothing to commit yet. */
990+
if (!fetch_head->fp || !atomic_fetch)
991+
return;
992+
strbuf_write(&fetch_head->buf, fetch_head->fp);
980993
}
981994

982995
static void close_fetch_head(struct fetch_head *fetch_head)
@@ -1003,7 +1016,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
10031016
struct fetch_head fetch_head;
10041017
struct commit *commit;
10051018
int url_len, i, rc = 0;
1006-
struct strbuf note = STRBUF_INIT;
1019+
struct strbuf note = STRBUF_INIT, err = STRBUF_INIT;
1020+
struct ref_transaction *transaction = NULL;
10071021
const char *what, *kind;
10081022
struct ref *rm;
10091023
char *url;
@@ -1029,6 +1043,14 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
10291043
}
10301044
}
10311045

1046+
if (atomic_fetch) {
1047+
transaction = ref_transaction_begin(&err);
1048+
if (!transaction) {
1049+
error("%s", err.buf);
1050+
goto abort;
1051+
}
1052+
}
1053+
10321054
prepare_format_display(ref_map);
10331055

10341056
/*
@@ -1105,7 +1127,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
11051127

11061128
strbuf_reset(&note);
11071129
if (ref) {
1108-
rc |= update_local_ref(ref, NULL, what,
1130+
rc |= update_local_ref(ref, transaction, what,
11091131
rm, &note, summary_width);
11101132
free(ref);
11111133
} else if (write_fetch_head || dry_run) {
@@ -1131,6 +1153,14 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
11311153
}
11321154
}
11331155

1156+
if (!rc && transaction) {
1157+
rc = ref_transaction_commit(transaction, &err);
1158+
if (rc) {
1159+
error("%s", err.buf);
1160+
goto abort;
1161+
}
1162+
}
1163+
11341164
if (!rc)
11351165
commit_fetch_head(&fetch_head);
11361166

@@ -1150,6 +1180,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
11501180

11511181
abort:
11521182
strbuf_release(&note);
1183+
strbuf_release(&err);
1184+
ref_transaction_free(transaction);
11531185
free(url);
11541186
close_fetch_head(&fetch_head);
11551187
return rc;
@@ -1961,6 +1993,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
19611993
die(_("--filter can only be used with the remote "
19621994
"configured in extensions.partialclone"));
19631995

1996+
if (atomic_fetch)
1997+
die(_("--atomic can only be used when fetching "
1998+
"from one remote"));
1999+
19642000
if (stdin_refspecs)
19652001
die(_("--stdin can only be used when fetching "
19662002
"from one remote"));

t/t5510-fetch.sh

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,174 @@ test_expect_success 'fetch --prune --tags with refspec prunes based on refspec'
176176
git rev-parse sometag
177177
'
178178

179+
test_expect_success 'fetch --atomic works with a single branch' '
180+
test_when_finished "rm -rf \"$D\"/atomic" &&
181+
182+
cd "$D" &&
183+
git clone . atomic &&
184+
git branch atomic-branch &&
185+
oid=$(git rev-parse atomic-branch) &&
186+
echo "$oid" >expected &&
187+
188+
git -C atomic fetch --atomic origin &&
189+
git -C atomic rev-parse origin/atomic-branch >actual &&
190+
test_cmp expected actual &&
191+
test $oid = "$(git -C atomic rev-parse --verify FETCH_HEAD)"
192+
'
193+
194+
test_expect_success 'fetch --atomic works with multiple branches' '
195+
test_when_finished "rm -rf \"$D\"/atomic" &&
196+
197+
cd "$D" &&
198+
git clone . atomic &&
199+
git branch atomic-branch-1 &&
200+
git branch atomic-branch-2 &&
201+
git branch atomic-branch-3 &&
202+
git rev-parse refs/heads/atomic-branch-1 refs/heads/atomic-branch-2 refs/heads/atomic-branch-3 >actual &&
203+
204+
git -C atomic fetch --atomic origin &&
205+
git -C atomic rev-parse refs/remotes/origin/atomic-branch-1 refs/remotes/origin/atomic-branch-2 refs/remotes/origin/atomic-branch-3 >expected &&
206+
test_cmp expected actual
207+
'
208+
209+
test_expect_success 'fetch --atomic works with mixed branches and tags' '
210+
test_when_finished "rm -rf \"$D\"/atomic" &&
211+
212+
cd "$D" &&
213+
git clone . atomic &&
214+
git branch atomic-mixed-branch &&
215+
git tag atomic-mixed-tag &&
216+
git rev-parse refs/heads/atomic-mixed-branch refs/tags/atomic-mixed-tag >actual &&
217+
218+
git -C atomic fetch --tags --atomic origin &&
219+
git -C atomic rev-parse refs/remotes/origin/atomic-mixed-branch refs/tags/atomic-mixed-tag >expected &&
220+
test_cmp expected actual
221+
'
222+
223+
test_expect_success 'fetch --atomic prunes references' '
224+
test_when_finished "rm -rf \"$D\"/atomic" &&
225+
226+
cd "$D" &&
227+
git branch atomic-prune-delete &&
228+
git clone . atomic &&
229+
git branch --delete atomic-prune-delete &&
230+
git branch atomic-prune-create &&
231+
git rev-parse refs/heads/atomic-prune-create >actual &&
232+
233+
git -C atomic fetch --prune --atomic origin &&
234+
test_must_fail git -C atomic rev-parse refs/remotes/origin/atomic-prune-delete &&
235+
git -C atomic rev-parse refs/remotes/origin/atomic-prune-create >expected &&
236+
test_cmp expected actual
237+
'
238+
239+
test_expect_success 'fetch --atomic aborts with non-fast-forward update' '
240+
test_when_finished "rm -rf \"$D\"/atomic" &&
241+
242+
cd "$D" &&
243+
git branch atomic-non-ff &&
244+
git clone . atomic &&
245+
git rev-parse HEAD >actual &&
246+
247+
git branch atomic-new-branch &&
248+
parent_commit=$(git rev-parse atomic-non-ff~) &&
249+
git update-ref refs/heads/atomic-non-ff $parent_commit &&
250+
251+
test_must_fail git -C atomic fetch --atomic origin refs/heads/*:refs/remotes/origin/* &&
252+
test_must_fail git -C atomic rev-parse refs/remotes/origin/atomic-new-branch &&
253+
git -C atomic rev-parse refs/remotes/origin/atomic-non-ff >expected &&
254+
test_cmp expected actual &&
255+
test_must_be_empty atomic/.git/FETCH_HEAD
256+
'
257+
258+
test_expect_success 'fetch --atomic executes a single reference transaction only' '
259+
test_when_finished "rm -rf \"$D\"/atomic" &&
260+
261+
cd "$D" &&
262+
git clone . atomic &&
263+
git branch atomic-hooks-1 &&
264+
git branch atomic-hooks-2 &&
265+
head_oid=$(git rev-parse HEAD) &&
266+
267+
cat >expected <<-EOF &&
268+
prepared
269+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-1
270+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-2
271+
committed
272+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-1
273+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-2
274+
EOF
275+
276+
rm -f atomic/actual &&
277+
write_script atomic/.git/hooks/reference-transaction <<-\EOF &&
278+
( echo "$*" && cat ) >>actual
279+
EOF
280+
281+
git -C atomic fetch --atomic origin &&
282+
test_cmp expected atomic/actual
283+
'
284+
285+
test_expect_success 'fetch --atomic aborts all reference updates if hook aborts' '
286+
test_when_finished "rm -rf \"$D\"/atomic" &&
287+
288+
cd "$D" &&
289+
git clone . atomic &&
290+
git branch atomic-hooks-abort-1 &&
291+
git branch atomic-hooks-abort-2 &&
292+
git branch atomic-hooks-abort-3 &&
293+
git tag atomic-hooks-abort &&
294+
head_oid=$(git rev-parse HEAD) &&
295+
296+
cat >expected <<-EOF &&
297+
prepared
298+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-1
299+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-2
300+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-3
301+
$ZERO_OID $head_oid refs/tags/atomic-hooks-abort
302+
aborted
303+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-1
304+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-2
305+
$ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-3
306+
$ZERO_OID $head_oid refs/tags/atomic-hooks-abort
307+
EOF
308+
309+
rm -f atomic/actual &&
310+
write_script atomic/.git/hooks/reference-transaction <<-\EOF &&
311+
( echo "$*" && cat ) >>actual
312+
exit 1
313+
EOF
314+
315+
git -C atomic for-each-ref >expected-refs &&
316+
test_must_fail git -C atomic fetch --tags --atomic origin &&
317+
git -C atomic for-each-ref >actual-refs &&
318+
test_cmp expected-refs actual-refs &&
319+
test_must_be_empty atomic/.git/FETCH_HEAD
320+
'
321+
322+
test_expect_success 'fetch --atomic --append appends to FETCH_HEAD' '
323+
test_when_finished "rm -rf \"$D\"/atomic" &&
324+
325+
cd "$D" &&
326+
git clone . atomic &&
327+
oid=$(git rev-parse HEAD) &&
328+
329+
git branch atomic-fetch-head-1 &&
330+
git -C atomic fetch --atomic origin atomic-fetch-head-1 &&
331+
test_line_count = 1 atomic/.git/FETCH_HEAD &&
332+
333+
git branch atomic-fetch-head-2 &&
334+
git -C atomic fetch --atomic --append origin atomic-fetch-head-2 &&
335+
test_line_count = 2 atomic/.git/FETCH_HEAD &&
336+
cp atomic/.git/FETCH_HEAD expected &&
337+
338+
write_script atomic/.git/hooks/reference-transaction <<-\EOF &&
339+
exit 1
340+
EOF
341+
342+
git branch atomic-fetch-head-3 &&
343+
test_must_fail git -C atomic fetch --atomic --append origin atomic-fetch-head-3 &&
344+
test_cmp expected atomic/.git/FETCH_HEAD
345+
'
346+
179347
test_expect_success '--refmap="" ignores configured refspec' '
180348
cd "$TRASH_DIRECTORY" &&
181349
git clone "$D" remote-refs &&

0 commit comments

Comments
 (0)