Skip to content

Commit 1efba53

Browse files
derrickstoleeGit for Windows Build Agent
authored andcommitted
pack-objects: refactor path-walk delta phase
Previously, the --path-walk option to 'git pack-objects' would compute deltas inline with the path-walk logic. This would make the progress indicator look like it is taking a long time to enumerate objects, and then very quickly computed deltas. Instead of computing deltas on each region of objects organized by tree, store a list of regions corresponding to these groups. These can later be pulled from the list for delta compression before doing the "global" delta search. This presents a new progress indicator that can be used in tests to verify that this stage is happening. The current implementation is not integrated with threads, but could be done in a future update. Since we do not attempt to sort objects by size until after exploring all trees, we can remove the previous change to t5530 due to a different error message appearing first. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 1cd4d11 commit 1efba53

File tree

4 files changed

+74
-33
lines changed

4 files changed

+74
-33
lines changed

builtin/pack-objects.c

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3313,6 +3313,50 @@ static int should_attempt_deltas(struct object_entry *entry)
33133313
return 1;
33143314
}
33153315

3316+
static void find_deltas_for_region(struct object_entry *list UNUSED,
3317+
struct packing_region *region,
3318+
unsigned int *processed)
3319+
{
3320+
struct object_entry **delta_list;
3321+
uint32_t delta_list_nr = 0;
3322+
3323+
ALLOC_ARRAY(delta_list, region->nr);
3324+
for (uint32_t i = 0; i < region->nr; i++) {
3325+
struct object_entry *entry = to_pack.objects + region->start + i;
3326+
if (should_attempt_deltas(entry))
3327+
delta_list[delta_list_nr++] = entry;
3328+
}
3329+
3330+
QSORT(delta_list, delta_list_nr, type_size_sort);
3331+
find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3332+
free(delta_list);
3333+
}
3334+
3335+
static void find_deltas_by_region(struct object_entry *list,
3336+
struct packing_region *regions,
3337+
uint32_t start, uint32_t nr)
3338+
{
3339+
unsigned int processed = 0;
3340+
uint32_t progress_nr;
3341+
3342+
if (!nr)
3343+
return;
3344+
3345+
progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3346+
3347+
if (progress)
3348+
progress_state = start_progress(_("Compressing objects by path"),
3349+
progress_nr);
3350+
3351+
while (nr--)
3352+
find_deltas_for_region(list,
3353+
&regions[start++],
3354+
&processed);
3355+
3356+
display_progress(progress_state, progress_nr);
3357+
stop_progress(&progress_state);
3358+
}
3359+
33163360
static void prepare_pack(int window, int depth)
33173361
{
33183362
struct object_entry **delta_list;
@@ -3337,6 +3381,10 @@ static void prepare_pack(int window, int depth)
33373381
if (!to_pack.nr_objects || !window || !depth)
33383382
return;
33393383

3384+
if (path_walk)
3385+
find_deltas_by_region(to_pack.objects, to_pack.regions,
3386+
0, to_pack.nr_regions);
3387+
33403388
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
33413389
nr_deltas = n = 0;
33423390

@@ -4292,10 +4340,8 @@ static int add_objects_by_path(const char *path,
42924340
enum object_type type,
42934341
void *data)
42944342
{
4295-
struct object_entry **delta_list;
42964343
size_t oe_start = to_pack.nr_objects;
42974344
size_t oe_end;
4298-
unsigned int sub_list_size;
42994345
unsigned int *processed = data;
43004346

43014347
/*
@@ -4328,32 +4374,17 @@ static int add_objects_by_path(const char *path,
43284374
if (oe_end == oe_start || !window)
43294375
return 0;
43304376

4331-
sub_list_size = 0;
4332-
ALLOC_ARRAY(delta_list, oe_end - oe_start);
4377+
ALLOC_GROW(to_pack.regions,
4378+
to_pack.nr_regions + 1,
4379+
to_pack.nr_regions_alloc);
43334380

4334-
for (size_t i = 0; i < oe_end - oe_start; i++) {
4335-
struct object_entry *entry = to_pack.objects + oe_start + i;
4381+
to_pack.regions[to_pack.nr_regions].start = oe_start;
4382+
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4383+
to_pack.nr_regions++;
43364384

4337-
if (!should_attempt_deltas(entry))
4338-
continue;
4385+
*processed += oids->nr;
4386+
display_progress(progress_state, *processed);
43394387

4340-
delta_list[sub_list_size++] = entry;
4341-
}
4342-
4343-
/*
4344-
* Find delta bases among this list of objects that all match the same
4345-
* path. This causes the delta compression to be interleaved in the
4346-
* object walk, which can lead to confusing progress indicators. This is
4347-
* also incompatible with threaded delta calculations. In the future,
4348-
* consider creating a list of regions in the full to_pack.objects array
4349-
* that could be picked up by the threaded delta computation.
4350-
*/
4351-
if (sub_list_size && window) {
4352-
QSORT(delta_list, sub_list_size, type_size_sort);
4353-
find_deltas(delta_list, &sub_list_size, window, depth, processed);
4354-
}
4355-
4356-
free(delta_list);
43574388
return 0;
43584389
}
43594390

pack-objects.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,23 @@ struct object_entry {
119119
unsigned ext_base:1; /* delta_idx points outside packlist */
120120
};
121121

122+
/**
123+
* A packing region is a section of the packing_data.objects array
124+
* as given by a starting index and a number of elements.
125+
*/
126+
struct packing_region {
127+
uint32_t start;
128+
uint32_t nr;
129+
};
130+
122131
struct packing_data {
123132
struct repository *repo;
124133
struct object_entry *objects;
125134
uint32_t nr_objects, nr_alloc;
126135

136+
struct packing_region *regions;
137+
uint32_t nr_regions, nr_regions_alloc;
138+
127139
int32_t *index;
128140
uint32_t index_size;
129141

t/t5300-pack-object.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,9 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
726726
# Basic "repack everything" test
727727
test_expect_success '--path-walk pack everything' '
728728
git -C server rev-parse HEAD >in &&
729-
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
729+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
730+
--stdout --revs --path-walk --progress <in >out.pack 2>err &&
731+
grep "Compressing objects by path" err &&
730732
git -C server index-pack --stdin <out.pack
731733
'
732734

@@ -736,7 +738,9 @@ test_expect_success '--path-walk thin pack' '
736738
$(git -C server rev-parse HEAD)
737739
^$(git -C server rev-parse HEAD~2)
738740
EOF
739-
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
741+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
742+
--thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
743+
grep "Compressing objects by path" err &&
740744
git -C server index-pack --fix-thin --stdin <out.pack
741745
'
742746

t/t5530-upload-pack-error.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
3434
hexsz=$(test_oid hexsz) &&
3535
printf "%04xwant %s\n00000009done\n0000" \
3636
$(($hexsz + 10)) $head >input &&
37-
38-
# The current implementation of path-walk causes a different
39-
# error message. This will be changed by a future refactoring.
40-
GIT_TEST_PACK_PATH_WALK=0 &&
41-
export GIT_TEST_PACK_PATH_WALK &&
42-
4337
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
4438
test_grep "unable to read" output.err &&
4539
test_grep "pack-objects died" output.err

0 commit comments

Comments
 (0)