Skip to content

Commit 206a1bb

Browse files
derrickstoleegitster
authored andcommitted
pack-objects: refactor path-walk delta phase
Previously, the --path-walk option to 'git pack-objects' would compute deltas inline with the path-walk logic. This would make the progress indicator look like it is taking a long time to enumerate objects, and then very quickly computed deltas. Instead of computing deltas on each region of objects organized by tree, store a list of regions corresponding to these groups. These can later be pulled from the list for delta compression before doing the "global" delta search. This presents a new progress indicator that can be used in tests to verify that this stage is happening. The current implementation is not integrated with threads, but we are setting it up to arrive in the next change. Since we do not attempt to sort objects by size until after exploring all trees, we can remove the previous change to t5530 due to a different error message appearing first. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4933152 commit 206a1bb

File tree

4 files changed

+75
-34
lines changed

4 files changed

+75
-34
lines changed

builtin/pack-objects.c

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3237,6 +3237,51 @@ static int should_attempt_deltas(struct object_entry *entry)
32373237
return 1;
32383238
}
32393239

3240+
static void find_deltas_for_region(struct object_entry *list,
3241+
struct packing_region *region,
3242+
unsigned int *processed)
3243+
{
3244+
struct object_entry **delta_list;
3245+
unsigned int delta_list_nr = 0;
3246+
3247+
ALLOC_ARRAY(delta_list, region->nr);
3248+
for (size_t i = 0; i < region->nr; i++) {
3249+
struct object_entry *entry = list + region->start + i;
3250+
if (should_attempt_deltas(entry))
3251+
delta_list[delta_list_nr++] = entry;
3252+
}
3253+
3254+
QSORT(delta_list, delta_list_nr, type_size_sort);
3255+
find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3256+
free(delta_list);
3257+
}
3258+
3259+
static void find_deltas_by_region(struct object_entry *list,
3260+
struct packing_region *regions,
3261+
size_t start, size_t nr)
3262+
{
3263+
unsigned int processed = 0;
3264+
size_t progress_nr;
3265+
3266+
if (!nr)
3267+
return;
3268+
3269+
progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3270+
3271+
if (progress)
3272+
progress_state = start_progress(the_repository,
3273+
_("Compressing objects by path"),
3274+
progress_nr);
3275+
3276+
while (nr--)
3277+
find_deltas_for_region(list,
3278+
&regions[start++],
3279+
&processed);
3280+
3281+
display_progress(progress_state, progress_nr);
3282+
stop_progress(&progress_state);
3283+
}
3284+
32403285
static void prepare_pack(int window, int depth)
32413286
{
32423287
struct object_entry **delta_list;
@@ -3261,6 +3306,10 @@ static void prepare_pack(int window, int depth)
32613306
if (!to_pack.nr_objects || !window || !depth)
32623307
return;
32633308

3309+
if (path_walk)
3310+
find_deltas_by_region(to_pack.objects, to_pack.regions,
3311+
0, to_pack.nr_regions);
3312+
32643313
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
32653314
nr_deltas = n = 0;
32663315

@@ -4214,10 +4263,8 @@ static int add_objects_by_path(const char *path,
42144263
enum object_type type,
42154264
void *data)
42164265
{
4217-
struct object_entry **delta_list = NULL;
42184266
size_t oe_start = to_pack.nr_objects;
42194267
size_t oe_end;
4220-
unsigned int sub_list_nr;
42214268
unsigned int *processed = data;
42224269

42234270
/*
@@ -4250,33 +4297,17 @@ static int add_objects_by_path(const char *path,
42504297
if (oe_end == oe_start || !window)
42514298
return 0;
42524299

4253-
sub_list_nr = 0;
4254-
if (oe_end > oe_start)
4255-
ALLOC_ARRAY(delta_list, oe_end - oe_start);
4300+
ALLOC_GROW(to_pack.regions,
4301+
to_pack.nr_regions + 1,
4302+
to_pack.nr_regions_alloc);
42564303

4257-
for (size_t i = 0; i < oe_end - oe_start; i++) {
4258-
struct object_entry *entry = to_pack.objects + oe_start + i;
4304+
to_pack.regions[to_pack.nr_regions].start = oe_start;
4305+
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4306+
to_pack.nr_regions++;
42594307

4260-
if (!should_attempt_deltas(entry))
4261-
continue;
4308+
*processed += oids->nr;
4309+
display_progress(progress_state, *processed);
42624310

4263-
delta_list[sub_list_nr++] = entry;
4264-
}
4265-
4266-
/*
4267-
* Find delta bases among this list of objects that all match the same
4268-
* path. This causes the delta compression to be interleaved in the
4269-
* object walk, which can lead to confusing progress indicators. This is
4270-
* also incompatible with threaded delta calculations. In the future,
4271-
* consider creating a list of regions in the full to_pack.objects array
4272-
* that could be picked up by the threaded delta computation.
4273-
*/
4274-
if (sub_list_nr && window) {
4275-
QSORT(delta_list, sub_list_nr, type_size_sort);
4276-
find_deltas(delta_list, &sub_list_nr, window, depth, processed);
4277-
}
4278-
4279-
free(delta_list);
42804311
return 0;
42814312
}
42824313

pack-objects.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,23 @@ struct object_entry {
119119
unsigned ext_base:1; /* delta_idx points outside packlist */
120120
};
121121

122+
/**
123+
* A packing region is a section of the packing_data.objects array
124+
* as given by a starting index and a number of elements.
125+
*/
126+
struct packing_region {
127+
size_t start;
128+
size_t nr;
129+
};
130+
122131
struct packing_data {
123132
struct repository *repo;
124133
struct object_entry *objects;
125134
uint32_t nr_objects, nr_alloc;
126135

136+
struct packing_region *regions;
137+
size_t nr_regions, nr_regions_alloc;
138+
127139
int32_t *index;
128140
uint32_t index_size;
129141

t/t5300-pack-object.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,9 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
725725

726726
test_expect_success '--path-walk pack everything' '
727727
git -C server rev-parse HEAD >in &&
728-
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
728+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
729+
--stdout --revs --path-walk --progress <in >out.pack 2>err &&
730+
grep "Compressing objects by path" err &&
729731
git -C server index-pack --stdin <out.pack
730732
'
731733

@@ -734,7 +736,9 @@ test_expect_success '--path-walk thin pack' '
734736
$(git -C server rev-parse HEAD)
735737
^$(git -C server rev-parse HEAD~2)
736738
EOF
737-
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
739+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
740+
--thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
741+
grep "Compressing objects by path" err &&
738742
git -C server index-pack --fix-thin --stdin <out.pack
739743
'
740744

t/t5530-upload-pack-error.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
3434
hexsz=$(test_oid hexsz) &&
3535
printf "%04xwant %s\n00000009done\n0000" \
3636
$(($hexsz + 10)) $head >input &&
37-
38-
# The current implementation of path-walk causes a different
39-
# error message. This will be changed by a future refactoring.
40-
GIT_TEST_PACK_PATH_WALK=0 &&
41-
export GIT_TEST_PACK_PATH_WALK &&
42-
4337
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
4438
test_grep "unable to read" output.err &&
4539
test_grep "pack-objects died" output.err

0 commit comments

Comments
 (0)