Skip to content

Commit 6a7bc9d

Browse files
matheustavaresjeffhostetler
authored andcommitted
parallel-checkout: add tests related to path collisions
Add tests to confirm that path collisions are properly detected by checkout workers, both to avoid race conditions and to report colliding entries on clone. Co-authored-by: Jeff Hostetler <[email protected]> Signed-off-by: Matheus Tavares <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent d0e5d35 commit 6a7bc9d

File tree

3 files changed

+168
-2
lines changed

3 files changed

+168
-2
lines changed

parallel-checkout.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "sigchain.h"
99
#include "streaming.h"
1010
#include "thread-utils.h"
11+
#include "trace2.h"
1112

1213
struct pc_worker {
1314
struct child_process cp;
@@ -326,6 +327,7 @@ void write_pc_item(struct parallel_checkout_item *pc_item,
326327
if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf,
327328
state->base_dir_len)) {
328329
pc_item->status = PC_ITEM_COLLIDED;
330+
trace2_data_string("pcheckout", NULL, "collision/dirname", path.buf);
329331
goto out;
330332
}
331333

@@ -341,6 +343,8 @@ void write_pc_item(struct parallel_checkout_item *pc_item,
341343
* call should have already caught these cases.
342344
*/
343345
pc_item->status = PC_ITEM_COLLIDED;
346+
trace2_data_string("pcheckout", NULL,
347+
"collision/basename", path.buf);
344348
} else {
345349
error_errno("failed to open file '%s'", path.buf);
346350
pc_item->status = PC_ITEM_FAILED;

t/lib-parallel-checkout.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ test_checkout_workers () {
2222

2323
local trace_file=trace-test-checkout-workers &&
2424
rm -f "$trace_file" &&
25-
GIT_TRACE2="$(pwd)/$trace_file" "$@" &&
25+
GIT_TRACE2="$(pwd)/$trace_file" "$@" 2>&8 &&
2626

2727
local workers=$(grep "child_start\[..*\] git checkout--worker" "$trace_file" | wc -l) &&
2828
test $workers -eq $expected_workers &&
2929
rm "$trace_file"
30-
}
30+
} 8>&2 2>&4
3131

3232
# Verify that both the working tree and the index were created correctly
3333
verify_checkout () {
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#!/bin/sh
2+
3+
test_description="path collisions during parallel checkout
4+
5+
Parallel checkout must detect path collisions to:
6+
7+
1) Avoid racily writing to different paths that represent the same file on disk.
8+
2) Report the colliding entries on clone.
9+
10+
The tests in this file exercise parallel checkout's collision detection code in
11+
both these mechanics.
12+
"
13+
14+
. ./test-lib.sh
15+
. "$TEST_DIRECTORY/lib-parallel-checkout.sh"
16+
17+
TEST_ROOT="$PWD"
18+
19+
test_expect_success CASE_INSENSITIVE_FS 'setup' '
20+
empty_oid=$(git hash-object -w --stdin </dev/null) &&
21+
cat >objs <<-EOF &&
22+
100644 $empty_oid FILE_X
23+
100644 $empty_oid FILE_x
24+
100644 $empty_oid file_X
25+
100644 $empty_oid file_x
26+
EOF
27+
git update-index --index-info <objs &&
28+
git commit -m "colliding files" &&
29+
git tag basename_collision &&
30+
31+
write_script "$TEST_ROOT"/logger_script <<-\EOF
32+
echo "$@" >>filter.log
33+
EOF
34+
'
35+
36+
test_workers_in_event_trace ()
37+
{
38+
test $1 -eq $(grep ".event.:.child_start..*checkout--worker" $2 | wc -l)
39+
}
40+
41+
test_expect_success CASE_INSENSITIVE_FS 'worker detects basename collision' '
42+
GIT_TRACE2_EVENT="$(pwd)/trace" git \
43+
-c checkout.workers=2 -c checkout.thresholdForParallelism=0 \
44+
checkout . &&
45+
46+
test_workers_in_event_trace 2 trace &&
47+
collisions=$(grep -i "category.:.pcheckout.,.key.:.collision/basename.,.value.:.file_x.}" trace | wc -l) &&
48+
test $collisions -eq 3
49+
'
50+
51+
test_expect_success CASE_INSENSITIVE_FS 'worker detects dirname collision' '
52+
test_config filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" &&
53+
empty_oid=$(git hash-object -w --stdin </dev/null) &&
54+
55+
# By setting a filter command to "a", we make it ineligible for parallel
56+
# checkout, and thus it is checked out *first*. This way we can ensure
57+
# that "A/B" and "A/C" will both collide with the regular file "a".
58+
#
59+
attr_oid=$(echo "a filter=logger" | git hash-object -w --stdin) &&
60+
61+
cat >objs <<-EOF &&
62+
100644 $empty_oid A/B
63+
100644 $empty_oid A/C
64+
100644 $empty_oid a
65+
100644 $attr_oid .gitattributes
66+
EOF
67+
git rm -rf . &&
68+
git update-index --index-info <objs &&
69+
70+
rm -f trace filter.log &&
71+
GIT_TRACE2_EVENT="$(pwd)/trace" git \
72+
-c checkout.workers=2 -c checkout.thresholdForParallelism=0 \
73+
checkout . &&
74+
75+
# Check that "a" (and only "a") was filtered
76+
echo a >expected.log &&
77+
test_cmp filter.log expected.log &&
78+
79+
# Check that it used the right number of workers and detected the collisions
80+
test_workers_in_event_trace 2 trace &&
81+
grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/B.}" trace &&
82+
grep "category.:.pcheckout.,.key.:.collision/dirname.,.value.:.A/C.}" trace
83+
'
84+
85+
test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'do not follow symlinks colliding with leading dir' '
86+
empty_oid=$(git hash-object -w --stdin </dev/null) &&
87+
symlink_oid=$(echo "./e" | git hash-object -w --stdin) &&
88+
mkdir e &&
89+
90+
cat >objs <<-EOF &&
91+
120000 $symlink_oid D
92+
100644 $empty_oid d/x
93+
100644 $empty_oid e/y
94+
EOF
95+
git rm -rf . &&
96+
git update-index --index-info <objs &&
97+
98+
set_checkout_config 2 0 &&
99+
test_checkout_workers 2 git checkout . &&
100+
test_path_is_dir e &&
101+
test_path_is_missing e/x
102+
'
103+
104+
# The two following tests check that parallel checkout correctly reports
105+
# colliding entries on clone. The sequential code detects a collision by
106+
# calling lstat() before trying to open(O_CREAT) a file. (Note that this only
107+
# works for clone.) Then, to find the pair of a colliding item k, it searches
108+
# cache_entry[0, k-1]. This is not sufficient in parallel checkout because:
109+
#
110+
# - A colliding file may be created between the lstat() and open() calls;
111+
# - A colliding entry might appear in the second half of the cache_entry array.
112+
#
113+
test_expect_success CASE_INSENSITIVE_FS 'collision report on clone (w/ racy file creation)' '
114+
git reset --hard basename_collision &&
115+
set_checkout_config 2 0 &&
116+
test_checkout_workers 2 git clone . clone-repo 2>stderr &&
117+
118+
grep FILE_X stderr &&
119+
grep FILE_x stderr &&
120+
grep file_X stderr &&
121+
grep file_x stderr &&
122+
grep "the following paths have collided" stderr
123+
'
124+
125+
# This test ensures that the collision report code is correctly looking for
126+
# colliding peers in the second half of the cache_entry array. This is done by
127+
# defining a smudge command for the *last* array entry, which makes it
128+
# non-eligible for parallel-checkout. Thus, it is checked out *first*, before
129+
# spawning the workers.
130+
#
131+
# Note: this test doesn't work on Windows because, on this system, the
132+
# collision report code uses strcmp() to find the colliding pairs when
133+
# core.ignoreCase is false. And we need this setting for this test so that only
134+
# 'file_x' matches the pattern of the filter attribute. But the test works on
135+
# OSX, where the colliding pairs are found using inode.
136+
#
137+
test_expect_success CASE_INSENSITIVE_FS,!MINGW,!CYGWIN \
138+
'collision report on clone (w/ colliding peer after the detected entry)' '
139+
140+
test_config_global filter.logger.smudge "\"$TEST_ROOT/logger_script\" %f" &&
141+
git reset --hard basename_collision &&
142+
echo "file_x filter=logger" >.gitattributes &&
143+
git add .gitattributes &&
144+
git commit -m "filter for file_x" &&
145+
146+
rm -rf clone-repo &&
147+
set_checkout_config 2 0 &&
148+
test_checkout_workers 2 \
149+
git -c core.ignoreCase=false clone . clone-repo 2>stderr &&
150+
151+
grep FILE_X stderr &&
152+
grep FILE_x stderr &&
153+
grep file_X stderr &&
154+
grep file_x stderr &&
155+
grep "the following paths have collided" stderr &&
156+
157+
# Check that only "file_x" was filtered
158+
echo file_x >expected.log &&
159+
test_cmp clone-repo/filter.log expected.log
160+
'
161+
162+
test_done

0 commit comments

Comments
 (0)