Skip to content

Commit 498f1f6

Browse files
jonathantanmygitster
authored andcommitted
fsck: introduce partialclone extension
Currently, Git does not support repos with very large numbers of objects or repos that wish to minimize manipulation of certain blobs (for example, because they are very large) very well, even if the user operates mostly on part of the repo, because Git is designed on the assumption that every referenced object is available somewhere in the repo storage. In such an arrangement, the full set of objects is usually available in remote storage, ready to be lazily downloaded. Teach fsck about the new state of affairs. In this commit, teach fsck that missing promisor objects referenced from the reflog are not an error case; in future commits, fsck will be taught about other cases. Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 75b97fe commit 498f1f6

File tree

5 files changed

+171
-5
lines changed

5 files changed

+171
-5
lines changed

builtin/fsck.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
398398
xstrfmt("%s@{%"PRItime"}", refname, timestamp));
399399
obj->flags |= USED;
400400
mark_object_reachable(obj);
401-
} else {
401+
} else if (!is_promisor_object(oid)) {
402402
error("%s: invalid reflog entry %s", refname, oid_to_hex(oid));
403403
errors_found |= ERROR_REACHABLE;
404404
}

cache.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1587,7 +1587,8 @@ extern struct packed_git {
15871587
unsigned pack_local:1,
15881588
pack_keep:1,
15891589
freshened:1,
1590-
do_not_close:1;
1590+
do_not_close:1,
1591+
pack_promisor:1;
15911592
unsigned char sha1[20];
15921593
struct revindex_entry *revindex;
15931594
/* something like ".git/objects/pack/xxxxx.pack" */

packfile.c

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
#include "list.h"
99
#include "streaming.h"
1010
#include "sha1-lookup.h"
11+
#include "commit.h"
12+
#include "object.h"
13+
#include "tag.h"
14+
#include "tree-walk.h"
15+
#include "tree.h"
1116

1217
char *odb_pack_name(struct strbuf *buf,
1318
const unsigned char *sha1,
@@ -643,17 +648,21 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
643648
return NULL;
644649

645650
/*
646-
* ".pack" is long enough to hold any suffix we're adding (and
651+
* ".promisor" is long enough to hold any suffix we're adding (and
647652
* the use xsnprintf double-checks that)
648653
*/
649-
alloc = st_add3(path_len, strlen(".pack"), 1);
654+
alloc = st_add3(path_len, strlen(".promisor"), 1);
650655
p = alloc_packed_git(alloc);
651656
memcpy(p->pack_name, path, path_len);
652657

653658
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
654659
if (!access(p->pack_name, F_OK))
655660
p->pack_keep = 1;
656661

662+
xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
663+
if (!access(p->pack_name, F_OK))
664+
p->pack_promisor = 1;
665+
657666
xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
658667
if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
659668
free(p);
@@ -781,7 +790,8 @@ static void prepare_packed_git_one(char *objdir, int local)
781790
if (ends_with(de->d_name, ".idx") ||
782791
ends_with(de->d_name, ".pack") ||
783792
ends_with(de->d_name, ".bitmap") ||
784-
ends_with(de->d_name, ".keep"))
793+
ends_with(de->d_name, ".keep") ||
794+
ends_with(de->d_name, ".promisor"))
785795
string_list_append(&garbage, path.buf);
786796
else
787797
report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
@@ -1889,6 +1899,9 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
18891899
for (p = packed_git; p; p = p->next) {
18901900
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
18911901
continue;
1902+
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
1903+
!p->pack_promisor)
1904+
continue;
18921905
if (open_pack_index(p)) {
18931906
pack_errors = 1;
18941907
continue;
@@ -1899,3 +1912,61 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
18991912
}
19001913
return r ? r : pack_errors;
19011914
}
1915+
1916+
static int add_promisor_object(const struct object_id *oid,
1917+
struct packed_git *pack,
1918+
uint32_t pos,
1919+
void *set_)
1920+
{
1921+
struct oidset *set = set_;
1922+
struct object *obj = parse_object(oid);
1923+
if (!obj)
1924+
return 1;
1925+
1926+
oidset_insert(set, oid);
1927+
1928+
/*
1929+
* If this is a tree, commit, or tag, the objects it refers
1930+
* to are also promisor objects. (Blobs refer to no objects.)
1931+
*/
1932+
if (obj->type == OBJ_TREE) {
1933+
struct tree *tree = (struct tree *)obj;
1934+
struct tree_desc desc;
1935+
struct name_entry entry;
1936+
if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
1937+
/*
1938+
* Error messages are given when packs are
1939+
* verified, so do not print any here.
1940+
*/
1941+
return 0;
1942+
while (tree_entry_gently(&desc, &entry))
1943+
oidset_insert(set, entry.oid);
1944+
} else if (obj->type == OBJ_COMMIT) {
1945+
struct commit *commit = (struct commit *) obj;
1946+
struct commit_list *parents = commit->parents;
1947+
1948+
oidset_insert(set, &commit->tree->object.oid);
1949+
for (; parents; parents = parents->next)
1950+
oidset_insert(set, &parents->item->object.oid);
1951+
} else if (obj->type == OBJ_TAG) {
1952+
struct tag *tag = (struct tag *) obj;
1953+
oidset_insert(set, &tag->tagged->oid);
1954+
}
1955+
return 0;
1956+
}
1957+
1958+
int is_promisor_object(const struct object_id *oid)
1959+
{
1960+
static struct oidset promisor_objects;
1961+
static int promisor_objects_prepared;
1962+
1963+
if (!promisor_objects_prepared) {
1964+
if (repository_format_partial_clone) {
1965+
for_each_packed_object(add_promisor_object,
1966+
&promisor_objects,
1967+
FOR_EACH_OBJECT_PROMISOR_ONLY);
1968+
}
1969+
promisor_objects_prepared = 1;
1970+
}
1971+
return oidset_contains(&promisor_objects, oid);
1972+
}

packfile.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef PACKFILE_H
22
#define PACKFILE_H
33

4+
#include "oidset.h"
5+
46
/*
57
* Generate the filename to be used for a pack file with checksum "sha1" and
68
* extension "ext". The result is written into the strbuf "buf", overwriting
@@ -124,6 +126,11 @@ extern int has_sha1_pack(const unsigned char *sha1);
124126

125127
extern int has_pack_index(const unsigned char *sha1);
126128

129+
/*
130+
* Only iterate over packs obtained from the promisor remote.
131+
*/
132+
#define FOR_EACH_OBJECT_PROMISOR_ONLY 2
133+
127134
/*
128135
* Iterate over packed objects in both the local
129136
* repository and any alternates repositories (unless the
@@ -135,4 +142,10 @@ typedef int each_packed_object_fn(const struct object_id *oid,
135142
void *data);
136143
extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags);
137144

145+
/*
146+
* Return 1 if an object in a promisor packfile is or refers to the given
147+
* object, 0 otherwise.
148+
*/
149+
extern int is_promisor_object(const struct object_id *oid);
150+
138151
#endif

t/t0410-partial-clone.sh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/bin/sh
2+
3+
test_description='partial clone'
4+
5+
. ./test-lib.sh
6+
7+
delete_object () {
8+
rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|')
9+
}
10+
11+
pack_as_from_promisor () {
12+
HASH=$(git -C repo pack-objects .git/objects/pack/pack) &&
13+
>repo/.git/objects/pack/pack-$HASH.promisor
14+
}
15+
16+
test_expect_success 'missing reflog object, but promised by a commit, passes fsck' '
17+
test_create_repo repo &&
18+
test_commit -C repo my_commit &&
19+
20+
A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
21+
C=$(git -C repo commit-tree -m c -p $A HEAD^{tree}) &&
22+
23+
# Reference $A only from reflog, and delete it
24+
git -C repo branch my_branch "$A" &&
25+
git -C repo branch -f my_branch my_commit &&
26+
delete_object repo "$A" &&
27+
28+
# State that we got $C, which refers to $A, from promisor
29+
printf "$C\n" | pack_as_from_promisor &&
30+
31+
# Normally, it fails
32+
test_must_fail git -C repo fsck &&
33+
34+
# But with the extension, it succeeds
35+
git -C repo config core.repositoryformatversion 1 &&
36+
git -C repo config extensions.partialclone "arbitrary string" &&
37+
git -C repo fsck
38+
'
39+
40+
test_expect_success 'missing reflog object, but promised by a tag, passes fsck' '
41+
rm -rf repo &&
42+
test_create_repo repo &&
43+
test_commit -C repo my_commit &&
44+
45+
A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
46+
git -C repo tag -a -m d my_tag_name $A &&
47+
T=$(git -C repo rev-parse my_tag_name) &&
48+
git -C repo tag -d my_tag_name &&
49+
50+
# Reference $A only from reflog, and delete it
51+
git -C repo branch my_branch "$A" &&
52+
git -C repo branch -f my_branch my_commit &&
53+
delete_object repo "$A" &&
54+
55+
# State that we got $T, which refers to $A, from promisor
56+
printf "$T\n" | pack_as_from_promisor &&
57+
58+
git -C repo config core.repositoryformatversion 1 &&
59+
git -C repo config extensions.partialclone "arbitrary string" &&
60+
git -C repo fsck
61+
'
62+
63+
test_expect_success 'missing reflog object alone fails fsck, even with extension set' '
64+
rm -rf repo &&
65+
test_create_repo repo &&
66+
test_commit -C repo my_commit &&
67+
68+
A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
69+
B=$(git -C repo commit-tree -m b HEAD^{tree}) &&
70+
71+
# Reference $A only from reflog, and delete it
72+
git -C repo branch my_branch "$A" &&
73+
git -C repo branch -f my_branch my_commit &&
74+
delete_object repo "$A" &&
75+
76+
git -C repo config core.repositoryformatversion 1 &&
77+
git -C repo config extensions.partialclone "arbitrary string" &&
78+
test_must_fail git -C repo fsck
79+
'
80+
81+
test_done

0 commit comments

Comments
 (0)