Skip to content

Commit 35dbc30

Browse files
10ne1gitster
authored andcommitted
submodule: encode gitdir paths to avoid conflicts
This adds a new submoduleEncoding extension which encodes gitdir names to avoid collisions due to nested gitdirs or case insensitive filesystems. A custom encoding can become unnecessarily complex, while url-encoding is relatively well-known, however it needs some extending to support case insensitive filesystems, hence why A is encoded as _a, B as _b and so on. Unfortunately encoding A -> _a (...) is not enough to fix the reserved Windows file names (e.g. COM1) because worktrees still use names like COM1 even if the gitdirs paths are encoded, so future work is needed to fully address Windows reserved names. For now url-encoding is the only option, however in the future we may add alternatives (other encodings, hashes or even hash_name). Suggested-by: Phillip Wood <[email protected]> Suggested-by: Patrick Steinhardt <[email protected]> Signed-off-by: Adrian Ratiu <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent fb3b2e6 commit 35dbc30

File tree

8 files changed

+204
-20
lines changed

8 files changed

+204
-20
lines changed

Documentation/config/extensions.adoc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ relativeWorktrees::
6969
repaired with either the `--relative-paths` option or with the
7070
`worktree.useRelativePaths` config set to `true`.
7171
72+
submoduleEncoding::
73+
If enabled, submodule gitdir paths are encoded to avoid filesystem
74+
conflicts due to nested gitdirs or case insensitivity. For now, only
75+
url-encoding (rfc3986) is available, with a small addition to encode
76+
uppercase to lowercase letters (`A -> _a`, `B -> _b` and so on).
77+
Other encoding or hashing methods may be added in the future.
78+
Any preexisting non-encoded submodule gitdirs are used as-is, to
79+
ease migration and reduce risk of gitdirs not being recognized.
80+
7281
worktreeConfig::
7382
If enabled, then worktrees will load config settings from the
7483
`$GIT_DIR/config.worktree` file in addition to the

Documentation/config/submodule.adoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ submodule.<name>.active::
5555
submodule.<name>.gitdir::
5656
This option sets the gitdir path for submodule <name>, allowing users
5757
to override the default path or change the default path name encoding.
58+
Submodule gitdir encoding is enabled via `extensions.submoduleEncoding`
59+
(see linkgit:git-config[1]). This config works both with the extension
60+
enabled or disabled.
5861

5962
submodule.active::
6063
A repeated field which contains a pathspec used to match against a

repository.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct repository {
158158
int repository_format_worktree_config;
159159
int repository_format_relative_worktrees;
160160
int repository_format_precious_objects;
161+
int repository_format_submodule_encoding;
161162

162163
/* Indicate if a repository has a different 'commondir' from 'gitdir' */
163164
unsigned different_commondir:1;

setup.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,9 @@ static enum extension_result handle_extension(const char *var,
687687
} else if (!strcmp(ext, "relativeworktrees")) {
688688
data->relative_worktrees = git_config_bool(var, value);
689689
return EXTENSION_OK;
690+
} else if (!strcmp(ext, "submoduleencoding")) {
691+
data->submodule_encoding = git_config_bool(var, value);
692+
return EXTENSION_OK;
690693
}
691694
return EXTENSION_UNKNOWN;
692695
}
@@ -1864,6 +1867,8 @@ const char *setup_git_directory_gently(int *nongit_ok)
18641867
repo_fmt.worktree_config;
18651868
the_repository->repository_format_relative_worktrees =
18661869
repo_fmt.relative_worktrees;
1870+
the_repository->repository_format_submodule_encoding =
1871+
repo_fmt.submodule_encoding;
18671872
/* take ownership of repo_fmt.partial_clone */
18681873
the_repository->repository_format_partial_clone =
18691874
repo_fmt.partial_clone;
@@ -1962,6 +1967,8 @@ void check_repository_format(struct repository_format *fmt)
19621967
fmt->ref_storage_format);
19631968
the_repository->repository_format_worktree_config =
19641969
fmt->worktree_config;
1970+
the_repository->repository_format_submodule_encoding =
1971+
fmt->submodule_encoding;
19651972
the_repository->repository_format_relative_worktrees =
19661973
fmt->relative_worktrees;
19671974
the_repository->repository_format_partial_clone =

setup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct repository_format {
130130
char *partial_clone; /* value of extensions.partialclone */
131131
int worktree_config;
132132
int relative_worktrees;
133+
int submodule_encoding;
133134
int is_bare;
134135
int hash_algo;
135136
int compat_hash_algo;

submodule.c

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,13 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name)
22622262
char *p;
22632263
int ret = 0;
22642264

2265+
/*
2266+
* Skip these checks when extensions.submoduleEncoding is enabled because
2267+
* it fixes the nesting issues and the suffixes will not match by design.
2268+
*/
2269+
if (the_repository->repository_format_submodule_encoding)
2270+
return 0;
2271+
22652272
if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' ||
22662273
strcmp(p, submodule_name))
22672274
BUG("submodule name '%s' not a suffix of git dir '%s'",
@@ -2581,29 +2588,22 @@ int submodule_to_gitdir(struct repository *repo,
25812588
return ret;
25822589
}
25832590

2591+
static void strbuf_addstr_case_encode(struct strbuf *dst, const char *src)
2592+
{
2593+
for (; *src; src++) {
2594+
unsigned char c = *src;
2595+
if (c >= 'A' && c <= 'Z') {
2596+
strbuf_addch(dst, '_');
2597+
strbuf_addch(dst, c - 'A' + 'a');
2598+
} else {
2599+
strbuf_addch(dst, c);
2600+
}
2601+
}
2602+
}
2603+
25842604
void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
25852605
const char *submodule_name)
25862606
{
2587-
/*
2588-
* NEEDSWORK: The current way of mapping a submodule's name to
2589-
* its location in .git/modules/ has problems with some naming
2590-
* schemes. For example, if a submodule is named "foo" and
2591-
* another is named "foo/bar" (whether present in the same
2592-
* superproject commit or not - the problem will arise if both
2593-
* superproject commits have been checked out at any point in
2594-
* time), or if two submodule names only have different cases in
2595-
* a case-insensitive filesystem.
2596-
*
2597-
* There are several solutions, including encoding the path in
2598-
* some way, introducing a submodule.<name>.gitdir config in
2599-
* .git/config (not .gitmodules) that allows overriding what the
2600-
* gitdir of a submodule would be (and teach Git, upon noticing
2601-
* a clash, to automatically determine a non-clashing name and
2602-
* to write such a config), or introducing a
2603-
* submodule.<name>.gitdir config in .gitmodules that repo
2604-
* administrators can explicitly set. Nothing has been decided,
2605-
* so for now, just append the name at the end of the path.
2606-
*/
26072607
char *gitdir_path, *key;
26082608

26092609
/* Allow config override. */
@@ -2618,4 +2618,20 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
26182618

26192619
repo_git_path_append(r, buf, "modules/");
26202620
strbuf_addstr(buf, submodule_name);
2621+
2622+
/* Existing legacy non-encoded names are used as-is */
2623+
if (is_git_directory(buf->buf))
2624+
return;
2625+
2626+
if (the_repository->repository_format_submodule_encoding) {
2627+
struct strbuf tmp = STRBUF_INIT;
2628+
2629+
strbuf_reset(buf);
2630+
repo_git_path_append(r, buf, "modules/");
2631+
2632+
strbuf_addstr_urlencode(&tmp, submodule_name, is_rfc3986_unreserved);
2633+
strbuf_addstr_case_encode(buf, tmp.buf);
2634+
2635+
strbuf_release(&tmp);
2636+
}
26212637
}

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,7 @@ integration_tests = [
874874
't7422-submodule-output.sh',
875875
't7423-submodule-symlinks.sh',
876876
't7424-submodule-mixed-ref-formats.sh',
877+
't7425-submodule-encoding.sh',
877878
't7450-bad-git-dotfiles.sh',
878879
't7500-commit-template-squash-signoff.sh',
879880
't7501-commit-basic-functionality.sh',

t/t7425-submodule-encoding.sh

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#!/bin/sh
2+
3+
test_description='submodules handle mixed legacy and new (encoded) style gitdir paths'
4+
5+
. ./test-lib.sh
6+
. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh
7+
8+
test_expect_success 'setup: allow file protocol' '
9+
git config --global protocol.file.allow always
10+
'
11+
12+
test_expect_success 'create repo with mixed encoded and non-encoded submodules' '
13+
git init -b main legacy-sub &&
14+
test_commit -C legacy-sub legacy-initial &&
15+
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&
16+
17+
git init -b main new-sub &&
18+
test_commit -C new-sub new-initial &&
19+
new_rev=$(git -C new-sub rev-parse HEAD) &&
20+
21+
git init -b main main &&
22+
(
23+
cd main &&
24+
git submodule add ../legacy-sub legacy &&
25+
test_commit legacy-sub &&
26+
27+
git config core.repositoryformatversion 1 &&
28+
git config extensions.submoduleEncoding true &&
29+
30+
git submodule add ../new-sub "New Sub" &&
31+
test_commit new
32+
)
33+
'
34+
35+
test_expect_success 'verify submodule name is properly encoded' '
36+
verify_submodule_gitdir_path main legacy modules/legacy &&
37+
verify_submodule_gitdir_path main "New Sub" modules/_new%20_sub
38+
'
39+
40+
test_expect_success 'clone from repo with both legacy and new-style submodules' '
41+
git clone --recurse-submodules main cloned-non-encoding &&
42+
(
43+
cd cloned-non-encoding &&
44+
45+
test_path_is_dir .git/modules/legacy &&
46+
test_path_is_dir .git/modules/"New Sub" &&
47+
48+
git submodule status >list &&
49+
test_grep "$legacy_rev legacy" list &&
50+
test_grep "$new_rev New Sub" list
51+
) &&
52+
53+
git clone -c extensions.submoduleEncoding=true --recurse-submodules main cloned-encoding &&
54+
(
55+
cd cloned-encoding &&
56+
57+
test_path_is_dir .git/modules/legacy &&
58+
test_path_is_dir .git/modules/_new%20_sub &&
59+
60+
git submodule status >list &&
61+
test_grep "$legacy_rev legacy" list &&
62+
test_grep "$new_rev New Sub" list
63+
)
64+
'
65+
66+
test_expect_success 'commit and push changes to encoded submodules' '
67+
git -C legacy-sub config receive.denyCurrentBranch updateInstead &&
68+
git -C new-sub config receive.denyCurrentBranch updateInstead &&
69+
git -C main config receive.denyCurrentBranch updateInstead &&
70+
(
71+
cd cloned-encoding &&
72+
73+
git -C legacy switch --track -C main origin/main &&
74+
test_commit -C legacy second-commit &&
75+
git -C legacy push &&
76+
77+
git -C "New Sub" switch --track -C main origin/main &&
78+
test_commit -C "New Sub" second-commit &&
79+
git -C "New Sub" push &&
80+
81+
# Stage and commit submodule changes in superproject
82+
git switch --track -C main origin/main &&
83+
git add legacy "New Sub" &&
84+
git commit -m "update submodules" &&
85+
86+
# push superproject commit to main repo
87+
git push
88+
) &&
89+
90+
# update expected legacy & new submodule checksums
91+
legacy_rev=$(git -C legacy-sub rev-parse HEAD) &&
92+
new_rev=$(git -C new-sub rev-parse HEAD)
93+
'
94+
95+
test_expect_success 'fetch mixed submodule changes and verify updates' '
96+
(
97+
cd main &&
98+
99+
# only update submodules because superproject was
100+
# pushed into at the end of last test
101+
git submodule update --init --recursive &&
102+
103+
test_path_is_dir .git/modules/legacy &&
104+
test_path_is_dir .git/modules/_new%20_sub &&
105+
106+
# Verify both submodules are at the expected commits
107+
git submodule status >list &&
108+
test_grep "$legacy_rev legacy" list &&
109+
test_grep "$new_rev New Sub" list
110+
)
111+
'
112+
113+
test_expect_success 'setup submodules with nested git dirs' '
114+
git init nested &&
115+
test_commit -C nested nested &&
116+
(
117+
cd nested &&
118+
cat >.gitmodules <<-EOF &&
119+
[submodule "hippo"]
120+
url = .
121+
path = thing1
122+
[submodule "hippo/hooks"]
123+
url = .
124+
path = thing2
125+
EOF
126+
git clone . thing1 &&
127+
git clone . thing2 &&
128+
git add .gitmodules thing1 thing2 &&
129+
test_tick &&
130+
git commit -m nested
131+
)
132+
'
133+
134+
test_expect_success 'git dirs of encoded sibling submodules must not be nested' '
135+
git clone -c extensions.submoduleEncoding=true --recurse-submodules nested clone_nested &&
136+
verify_submodule_gitdir_path clone_nested hippo modules/hippo &&
137+
verify_submodule_gitdir_path clone_nested hippo/hooks modules/hippo%2fhooks
138+
'
139+
140+
test_expect_success 'submodule git dir nesting detection must work with parallel cloning' '
141+
git clone -c extensions.submoduleEncoding=true --recurse-submodules --jobs=2 nested clone_parallel &&
142+
verify_submodule_gitdir_path clone_parallel hippo modules/hippo &&
143+
verify_submodule_gitdir_path clone_parallel hippo/hooks modules/hippo%2fhooks
144+
'
145+
146+
test_done

0 commit comments

Comments
 (0)