Skip to content

Commit 6b67e0d

Browse files
committed
fetch: verify we have everything we need before updating our ref
The "git fetch" command works in two phases. The remote side tells us what objects are at the tip of the refs we are fetching from, and transfers the objects missing from our side. After storing the objects in our repository, we update our remote tracking branches to point at the updated tips of the refs. A broken or malicious remote side could send a perfectly well-formed pack data during the object transfer phase, but there is no guarantee that the given data actually fill the gap between the objects we originally had and the refs we are updating to. Although this kind of breakage can be caught by running fsck after a fetch, it is much cheaper to verify that everything that is reachable from the tips of the refs we fetched are indeed fully connected to the tips of our current set of refs before we update them. Signed-off-by: Junio C Hamano <[email protected]>
1 parent 5a48d24 commit 6b67e0d

File tree

1 file changed

+63
-56
lines changed

1 file changed

+63
-56
lines changed

builtin/fetch.c

Lines changed: 63 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,64 @@ static int update_local_ref(struct ref *ref,
345345
}
346346
}
347347

348+
/*
349+
* The ref_map records the tips of the refs we are fetching. If
350+
*
351+
* $ git rev-list --verify-objects --stdin --not --all
352+
*
353+
* (feeding all the refs in ref_map on its standard input) does not
354+
* error out, that means everything reachable from these updated refs
355+
* locally exists and is connected to some of our existing refs.
356+
*
357+
* Returns 0 if everything is connected, non-zero otherwise.
358+
*/
359+
static int check_everything_connected(struct ref *ref_map, int quiet)
360+
{
361+
struct child_process rev_list;
362+
const char *argv[] = {"rev-list", "--verify-objects",
363+
"--stdin", "--not", "--all", NULL, NULL};
364+
char commit[41];
365+
struct ref *ref;
366+
int err = 0;
367+
368+
if (!ref_map)
369+
return 0;
370+
371+
if (quiet)
372+
argv[5] = "--quiet";
373+
374+
memset(&rev_list, 0, sizeof(rev_list));
375+
rev_list.argv = argv;
376+
rev_list.git_cmd = 1;
377+
rev_list.in = -1;
378+
rev_list.no_stdout = 1;
379+
rev_list.no_stderr = quiet;
380+
if (start_command(&rev_list))
381+
return error(_("Could not run 'git rev-list'"));
382+
383+
sigchain_push(SIGPIPE, SIG_IGN);
384+
385+
memcpy(commit + 40, "\n", 2);
386+
for (ref = ref_map; ref; ref = ref->next) {
387+
memcpy(commit, sha1_to_hex(ref->old_sha1), 40);
388+
if (write_in_full(rev_list.in, commit, 41) < 0) {
389+
if (errno != EPIPE && errno != EINVAL)
390+
error(_("failed write to rev-list: %s"),
391+
strerror(errno));
392+
err = -1;
393+
break;
394+
}
395+
}
396+
if (close(rev_list.in)) {
397+
error(_("failed to close rev-list's stdin: %s"), strerror(errno));
398+
err = -1;
399+
}
400+
401+
sigchain_pop(SIGPIPE);
402+
403+
return finish_command(&rev_list) || err;
404+
}
405+
348406
static int store_updated_refs(const char *raw_url, const char *remote_name,
349407
struct ref *ref_map)
350408
{
@@ -364,6 +422,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
364422
url = transport_anonymize_url(raw_url);
365423
else
366424
url = xstrdup("foreign");
425+
426+
if (check_everything_connected(ref_map, 0))
427+
return error(_("%s did not send all necessary objects\n"), url);
428+
367429
for (rm = ref_map; rm; rm = rm->next) {
368430
struct ref *ref = NULL;
369431

@@ -457,24 +519,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
457519
* We would want to bypass the object transfer altogether if
458520
* everything we are going to fetch already exists and is connected
459521
* locally.
460-
*
461-
* The refs we are going to fetch are in ref_map. If running
462-
*
463-
* $ git rev-list --objects --stdin --not --all
464-
*
465-
* (feeding all the refs in ref_map on its standard input)
466-
* does not error out, that means everything reachable from the
467-
* refs we are going to fetch exists and is connected to some of
468-
* our existing refs.
469522
*/
470523
static int quickfetch(struct ref *ref_map)
471524
{
472-
struct child_process revlist;
473-
struct ref *ref;
474-
int err;
475-
const char *argv[] = {"rev-list",
476-
"--quiet", "--objects", "--stdin", "--not", "--all", NULL};
477-
478525
/*
479526
* If we are deepening a shallow clone we already have these
480527
* objects reachable. Running rev-list here will return with
@@ -484,47 +531,7 @@ static int quickfetch(struct ref *ref_map)
484531
*/
485532
if (depth)
486533
return -1;
487-
488-
if (!ref_map)
489-
return 0;
490-
491-
memset(&revlist, 0, sizeof(revlist));
492-
revlist.argv = argv;
493-
revlist.git_cmd = 1;
494-
revlist.no_stdout = 1;
495-
revlist.no_stderr = 1;
496-
revlist.in = -1;
497-
498-
err = start_command(&revlist);
499-
if (err) {
500-
error(_("could not run rev-list"));
501-
return err;
502-
}
503-
504-
/*
505-
* If rev-list --stdin encounters an unknown commit, it terminates,
506-
* which will cause SIGPIPE in the write loop below.
507-
*/
508-
sigchain_push(SIGPIPE, SIG_IGN);
509-
510-
for (ref = ref_map; ref; ref = ref->next) {
511-
if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 ||
512-
write_str_in_full(revlist.in, "\n") < 0) {
513-
if (errno != EPIPE && errno != EINVAL)
514-
error(_("failed write to rev-list: %s"), strerror(errno));
515-
err = -1;
516-
break;
517-
}
518-
}
519-
520-
if (close(revlist.in)) {
521-
error(_("failed to close rev-list's stdin: %s"), strerror(errno));
522-
err = -1;
523-
}
524-
525-
sigchain_pop(SIGPIPE);
526-
527-
return finish_command(&revlist) || err;
534+
return check_everything_connected(ref_map, 1);
528535
}
529536

530537
static int fetch_refs(struct transport *transport, struct ref *ref_map)

0 commit comments

Comments
 (0)