Skip to content

Commit 5707d2c

Browse files
authored
[2.51.0 Bug] Missing singleton objects in 'git repack -adf --path-walk' (#796)
This is a port of gitgitgadget#1956, and I deem it one of the blockers against making a full release of Microsoft Git v2.51. Here is what Stolee wrote in the cover letter: Now that the `--path-walk` feature for `git repack` is out in the wild and getting more visibility than it did in the Git for Windows fork, the following issue was brought to my attention: Some folks would report missing objects after `git repack -adf --path-walk`! It turns out that this snuck through the cracks because it was pretty difficult to create a reproducing test case (patch 1) but it boils down to: 1. A path has exactly one version across all of the history being repacked. 2. That path is in the index. 3. The object at that path is not a loose object. 4. pack.useSparse=true in the config (this is the default) It is also something where users don't necessarily notice the missing objects until they fetch and a missing object is used as a delta base. Doing normal checkouts doesn't cause changes to these files, so they are never opened by Git. Users hitting this issue can usually recover using `git fetch --refetch` to repopulate the missing objects from a remote (unless they never had a remote at all). Patch 1 introduces the fix for this issue, which is related to forgetting to initialize a struct indicator when walking the pending objects. When reflecting on the ways that I missed this when building the feature, I think the core issue was an overreliance on using bare repos in testing. I also think that the way that the UNINTERESTING object exploration was implemented was particularly fragile to missing updates to the initialization of the struct, so patch 2 adds a new initializer to reduce duplicate code and to help avoid this mistake in the future.
2 parents 28b5b51 + d5b5efb commit 5707d2c

File tree

2 files changed

+88
-30
lines changed

2 files changed

+88
-30
lines changed

path-walk.c

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,24 @@ static void push_to_stack(struct path_walk_context *ctx,
105105
prio_queue_put(&ctx->path_stack, xstrdup(path));
106106
}
107107

108+
static void add_path_to_list(struct path_walk_context *ctx,
109+
const char *path,
110+
enum object_type type,
111+
struct object_id *oid,
112+
int interesting)
113+
{
114+
struct type_and_oid_list *list = strmap_get(&ctx->paths_to_lists, path);
115+
116+
if (!list) {
117+
CALLOC_ARRAY(list, 1);
118+
list->type = type;
119+
strmap_put(&ctx->paths_to_lists, path, list);
120+
}
121+
122+
list->maybe_interesting |= interesting;
123+
oid_array_append(&list->oids, oid);
124+
}
125+
108126
static int add_tree_entries(struct path_walk_context *ctx,
109127
const char *base_path,
110128
struct object_id *oid)
@@ -129,7 +147,6 @@ static int add_tree_entries(struct path_walk_context *ctx,
129147

130148
init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
131149
while (tree_entry(&desc, &entry)) {
132-
struct type_and_oid_list *list;
133150
struct object *o;
134151
/* Not actually true, but we will ignore submodules later. */
135152
enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
@@ -190,17 +207,10 @@ static int add_tree_entries(struct path_walk_context *ctx,
190207
continue;
191208
}
192209

193-
if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
194-
CALLOC_ARRAY(list, 1);
195-
list->type = type;
196-
strmap_put(&ctx->paths_to_lists, path.buf, list);
197-
}
198-
push_to_stack(ctx, path.buf);
199-
200-
if (!(o->flags & UNINTERESTING))
201-
list->maybe_interesting = 1;
210+
add_path_to_list(ctx, path.buf, type, &entry.oid,
211+
!(o->flags & UNINTERESTING));
202212

203-
oid_array_append(&list->oids, &entry.oid);
213+
push_to_stack(ctx, path.buf);
204214
}
205215

206216
free_tree_buffer(tree);
@@ -377,15 +387,9 @@ static int setup_pending_objects(struct path_walk_info *info,
377387
if (!info->trees)
378388
continue;
379389
if (pending->path) {
380-
struct type_and_oid_list *list;
381390
char *path = *pending->path ? xstrfmt("%s/", pending->path)
382391
: xstrdup("");
383-
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
384-
CALLOC_ARRAY(list, 1);
385-
list->type = OBJ_TREE;
386-
strmap_put(&ctx->paths_to_lists, path, list);
387-
}
388-
oid_array_append(&list->oids, &obj->oid);
392+
add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1);
389393
free(path);
390394
} else {
391395
/* assume a root tree, such as a lightweight tag. */
@@ -396,19 +400,10 @@ static int setup_pending_objects(struct path_walk_info *info,
396400
case OBJ_BLOB:
397401
if (!info->blobs)
398402
continue;
399-
if (pending->path) {
400-
struct type_and_oid_list *list;
401-
char *path = pending->path;
402-
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
403-
CALLOC_ARRAY(list, 1);
404-
list->type = OBJ_BLOB;
405-
strmap_put(&ctx->paths_to_lists, path, list);
406-
}
407-
oid_array_append(&list->oids, &obj->oid);
408-
} else {
409-
/* assume a root tree, such as a lightweight tag. */
403+
if (pending->path)
404+
add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
405+
else
410406
oid_array_append(&tagged_blobs->oids, &obj->oid);
411-
}
412407
break;
413408

414409
case OBJ_COMMIT:

t/t7700-repack.sh

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,4 +838,67 @@ test_expect_success '-n overrides repack.updateServerInfo=true' '
838838
test_server_info_missing
839839
'
840840

841+
test_expect_success 'pending objects are repacked appropriately' '
842+
test_when_finished rm -rf pending &&
843+
git init pending &&
844+
845+
(
846+
cd pending &&
847+
848+
# Commit file, a/b/c and never change them.
849+
mkdir -p a/b &&
850+
echo singleton >file &&
851+
echo stuff >a/b/c &&
852+
echo more >a/d &&
853+
git add file a &&
854+
git commit -m "single blobs" &&
855+
856+
# Files a/d and a/e will not be singletons.
857+
echo d >a/d &&
858+
echo e >a/e &&
859+
git add a &&
860+
git commit -m "more blobs" &&
861+
862+
# This use of a sparse index helps to force
863+
# test that the cache-tree is walked, too.
864+
git sparse-checkout set --sparse-index a x &&
865+
866+
# Create staged changes:
867+
# * a/e now has multiple versions.
868+
# * a/i now has only one version.
869+
echo f >a/d &&
870+
echo h >a/e &&
871+
echo i >a/i &&
872+
git add a &&
873+
874+
# Stage and unstage a change to make use of
875+
# resolve-undo cache and how that impacts fsck.
876+
mkdir x &&
877+
echo y >x/y &&
878+
git add x &&
879+
xy=$(git rev-parse :x/y) &&
880+
git rm --cached x/y &&
881+
882+
# The blob for x/y must persist through repacks,
883+
# but fsck currently ignores the REUC extension
884+
# for finding links to the blob.
885+
cat >expect <<-EOF &&
886+
dangling blob $xy
887+
EOF
888+
889+
# Bring the loose objects into a packfile to avoid
890+
# leftovers in next test. Without this, the loose
891+
# objects persist and the test succeeds for other
892+
# reasons.
893+
git repack -adf &&
894+
git fsck >out &&
895+
test_cmp expect out &&
896+
897+
# Test path walk version with pack.useSparse.
898+
git -c pack.useSparse=true repack -adf --path-walk &&
899+
git fsck >out &&
900+
test_cmp expect out
901+
)
902+
'
903+
841904
test_done

0 commit comments

Comments
 (0)