Skip to content

Commit 1947d1b

Browse files
committed
repack: add --name-hash-version option
The new '--name-hash-version' option for 'git repack' is a simple pass-through to the underlying 'git pack-objects' subcommand. However, this subcommand may have other options and a temporary filename as part of the subcommand execution that may not be predictable or could change over time. The existing test_subcommand method requires an exact list of arguments for the subcommand. This is too rigid for our needs here, so create a new method, test_subcommand_flex. Use it to check that the --name-hash-version option is passing through. Signed-off-by: Derrick Stolee <[email protected]>
1 parent fb52ca5 commit 1947d1b

File tree

5 files changed

+73
-4
lines changed

5 files changed

+73
-4
lines changed

Documentation/git-repack.txt

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ git-repack - Pack unpacked objects in a repository
99
SYNOPSIS
1010
--------
1111
[verse]
12-
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>] [--write-midx]
12+
'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]
13+
[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]
14+
[--write-midx] [--name-hash-version=<n>]
1315

1416
DESCRIPTION
1517
-----------
@@ -249,6 +251,36 @@ linkgit:git-multi-pack-index[1]).
249251
Write a multi-pack index (see linkgit:git-multi-pack-index[1])
250252
containing the non-redundant packs.
251253

254+
--name-hash-version=<n>::
255+
While performing delta compression, Git groups objects that may be
256+
similar based on heuristics using the path to that object. While
257+
grouping objects by an exact path match is good for paths with
258+
many versions, there are benefits for finding delta pairs across
259+
different full paths. Git collects objects by type and then by a
260+
"name hash" of the path and then by size, hoping to group objects
261+
that will compress well together.
262+
+
263+
The default name hash version is `1`, which prioritizes hash locality by
264+
considering the final bytes of the path as providing the maximum magnitude
265+
to the hash function. This version excels at distinguishing short paths
266+
and finding renames across directories. However, the hash function depends
267+
primarily on the final 16 bytes of the path. If there are many paths in
268+
the repo that have the same final 16 bytes and differ only by parent
269+
directory, then this name-hash may lead to too many collisions and cause
270+
poor results. At the moment, this version is required when writing
271+
reachability bitmap files with `--write-bitmap-index`.
272+
+
273+
The name hash version `2` has similar locality features as version `1`,
274+
except it considers each path component separately and overlays the hashes
275+
with a shift. This still prioritizes the final bytes of the path, but also
276+
"salts" the lower bits of the hash using the parent directory names. This
277+
method allows for some of the locality benefits of version `1` while
278+
breaking most of the collisions from a similarly-named file appearing in
279+
many different directories. At the moment, this version is not allowed
280+
when writing reachability bitmap files with `--write-bitmap-index` and it
281+
will be automatically changed to version `1`.
282+
283+
252284
CONFIGURATION
253285
-------------
254286

builtin/repack.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ static int run_update_server_info = 1;
3939
static char *packdir, *packtmp_name, *packtmp;
4040

4141
static const char *const git_repack_usage[] = {
42-
N_("git repack [<options>]"),
42+
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
43+
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
44+
"[--write-midx] [--name-hash-version=<n>]"),
4345
NULL
4446
};
4547

@@ -58,7 +60,7 @@ struct pack_objects_args {
5860
int no_reuse_object;
5961
int quiet;
6062
int local;
61-
int full_name_hash;
63+
int name_hash_version;
6264
struct list_objects_filter_options filter_options;
6365
};
6466

@@ -307,6 +309,8 @@ static void prepare_pack_objects(struct child_process *cmd,
307309
strvec_pushf(&cmd->args, "--no-reuse-delta");
308310
if (args->no_reuse_object)
309311
strvec_pushf(&cmd->args, "--no-reuse-object");
312+
if (args->name_hash_version)
313+
strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version);
310314
if (args->local)
311315
strvec_push(&cmd->args, "--local");
312316
if (args->quiet)
@@ -1204,6 +1208,8 @@ int cmd_repack(int argc,
12041208
N_("pass --no-reuse-delta to git-pack-objects")),
12051209
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
12061210
N_("pass --no-reuse-object to git-pack-objects")),
1211+
OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
1212+
N_("specify the name hash version to use for grouping similar objects by path")),
12071213
OPT_NEGBIT('n', NULL, &run_update_server_info,
12081214
N_("do not run git-update-server-info"), 1),
12091215
OPT__QUIET(&po_args.quiet, N_("be quiet")),

t/t0450/txt-help-mismatches

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ rebase
4545
remote
4646
remote-ext
4747
remote-fd
48-
repack
4948
reset
5049
restore
5150
rev-parse

t/t7700-repack.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,12 @@ test_expect_success 'repack -ad cleans up old .tmp-* packs' '
777777
test_must_be_empty tmpfiles
778778
'
779779

780+
test_expect_success '--name-hash-version option passes through to pack-objects' '
781+
GIT_TRACE2_EVENT="$(pwd)/hash-trace.txt" \
782+
git repack -a --name-hash-version=2 &&
783+
test_subcommand_flex git pack-objects --name-hash-version=2 <hash-trace.txt
784+
'
785+
780786
test_expect_success 'setup for update-server-info' '
781787
git init update-server-info &&
782788
test_commit -C update-server-info message

t/test-lib-functions.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,32 @@ test_subcommand () {
18861886
fi
18871887
}
18881888

1889+
# Check that the given subcommand was run with the given set of
1890+
# arguments in order (but with possible extra arguments).
1891+
#
1892+
# test_subcommand_flex [!] <command> <args>... < <trace>
1893+
#
1894+
# If the first parameter passed is !, this instead checks that
1895+
# the given command was not called.
1896+
#
1897+
test_subcommand_flex () {
1898+
local negate=
1899+
if test "$1" = "!"
1900+
then
1901+
negate=t
1902+
shift
1903+
fi
1904+
1905+
local expr="$(printf '"%s".*' "$@")"
1906+
1907+
if test -n "$negate"
1908+
then
1909+
! grep "\[$expr\]"
1910+
else
1911+
grep "\[$expr\]"
1912+
fi
1913+
}
1914+
18891915
# Check that the given command was invoked as part of the
18901916
# trace2-format trace on stdin.
18911917
#

0 commit comments

Comments
 (0)