Skip to content

Commit 7a43ab6

Browse files
committed
Merge branch 'sg/split-index-racefix'
The codepath to support the experimental split-index mode had remaining "racily clean" issues fixed. * sg/split-index-racefix: split-index: BUG() when cache entry refers to non-existing shared entry split-index: smudge and add racily clean cache entries to split index split-index: don't compare cached data of entries already marked for split index split-index: count the number of deleted entries t1700-split-index: date back files to avoid racy situations split-index: add tests to demonstrate the racy split index problem t1700-split-index: document why FSMONITOR is disabled in this test script
2 parents 3c4a821 + 4c490f3 commit 7a43ab6

File tree

5 files changed

+360
-42
lines changed

5 files changed

+360
-42
lines changed

cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,8 @@ extern void *read_blob_data_from_index(const struct index_state *, const char *,
783783
#define CE_MATCH_REFRESH 0x10
784784
/* don't refresh_fsmonitor state or do stat comparison even if CE_FSMONITOR_VALID is true */
785785
#define CE_MATCH_IGNORE_FSMONITOR 0X20
786+
extern int is_racy_timestamp(const struct index_state *istate,
787+
const struct cache_entry *ce);
786788
extern int ie_match_stat(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
787789
extern int ie_modified(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
788790

read-cache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ static int is_racy_stat(const struct index_state *istate,
345345
);
346346
}
347347

348-
static int is_racy_timestamp(const struct index_state *istate,
348+
int is_racy_timestamp(const struct index_state *istate,
349349
const struct cache_entry *ce)
350350
{
351351
return (!S_ISGITLINK(ce->ce_mode) &&

split-index.c

Lines changed: 115 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static void mark_entry_for_delete(size_t pos, void *data)
111111
die("position for delete %d exceeds base index size %d",
112112
(int)pos, istate->cache_nr);
113113
istate->cache[pos]->ce_flags |= CE_REMOVE;
114-
istate->split_index->nr_deletions = 1;
114+
istate->split_index->nr_deletions++;
115115
}
116116

117117
static void replace_entry(size_t pos, void *data)
@@ -188,6 +188,30 @@ void merge_base_index(struct index_state *istate)
188188
si->saved_cache_nr = 0;
189189
}
190190

191+
/*
192+
* Compare most of the fields in two cache entries, i.e. all except the
193+
* hashmap_entry and the name.
194+
*/
195+
static int compare_ce_content(struct cache_entry *a, struct cache_entry *b)
196+
{
197+
const unsigned int ondisk_flags = CE_STAGEMASK | CE_VALID |
198+
CE_EXTENDED_FLAGS;
199+
unsigned int ce_flags = a->ce_flags;
200+
unsigned int base_flags = b->ce_flags;
201+
int ret;
202+
203+
/* only on-disk flags matter */
204+
a->ce_flags &= ondisk_flags;
205+
b->ce_flags &= ondisk_flags;
206+
ret = memcmp(&a->ce_stat_data, &b->ce_stat_data,
207+
offsetof(struct cache_entry, name) -
208+
offsetof(struct cache_entry, ce_stat_data));
209+
a->ce_flags = ce_flags;
210+
b->ce_flags = base_flags;
211+
212+
return ret;
213+
}
214+
191215
void prepare_to_write_split_index(struct index_state *istate)
192216
{
193217
struct split_index *si = init_split_index(istate);
@@ -207,38 +231,109 @@ void prepare_to_write_split_index(struct index_state *istate)
207231
*/
208232
for (i = 0; i < istate->cache_nr; i++) {
209233
struct cache_entry *base;
210-
/* namelen is checked separately */
211-
const unsigned int ondisk_flags =
212-
CE_STAGEMASK | CE_VALID | CE_EXTENDED_FLAGS;
213-
unsigned int ce_flags, base_flags, ret;
214234
ce = istate->cache[i];
215-
if (!ce->index)
235+
if (!ce->index) {
236+
/*
237+
* During simple update index operations this
238+
* is a cache entry that is not present in
239+
* the shared index. It will be added to the
240+
* split index.
241+
*
242+
* However, it might also represent a file
243+
* that already has a cache entry in the
244+
* shared index, but a new index has just
245+
* been constructed by unpack_trees(), and
246+
* this entry now refers to different content
247+
* than what was recorded in the original
248+
* index, e.g. during 'read-tree -m HEAD^' or
249+
* 'checkout HEAD^'. In this case the
250+
* original entry in the shared index will be
251+
* marked as deleted, and this entry will be
252+
* added to the split index.
253+
*/
216254
continue;
255+
}
217256
if (ce->index > si->base->cache_nr) {
218-
ce->index = 0;
219-
continue;
257+
BUG("ce refers to a shared ce at %d, which is beyond the shared index size %d",
258+
ce->index, si->base->cache_nr);
220259
}
221260
ce->ce_flags |= CE_MATCHED; /* or "shared" */
222261
base = si->base->cache[ce->index - 1];
223-
if (ce == base)
262+
if (ce == base) {
263+
/* The entry is present in the shared index. */
264+
if (ce->ce_flags & CE_UPDATE_IN_BASE) {
265+
/*
266+
* Already marked for inclusion in
267+
* the split index, either because
268+
* the corresponding file was
269+
* modified and the cached stat data
270+
* was refreshed, or because there
271+
* is already a replacement entry in
272+
* the split index.
273+
* Nothing more to do here.
274+
*/
275+
} else if (!ce_uptodate(ce) &&
276+
is_racy_timestamp(istate, ce)) {
277+
/*
278+
* A racily clean cache entry stored
279+
* only in the shared index: it must
280+
* be added to the split index, so
281+
* the subsequent do_write_index()
282+
* can smudge its stat data.
283+
*/
284+
ce->ce_flags |= CE_UPDATE_IN_BASE;
285+
} else {
286+
/*
287+
* The entry is only present in the
288+
* shared index and it was not
289+
* refreshed.
290+
* Just leave it there.
291+
*/
292+
}
224293
continue;
294+
}
225295
if (ce->ce_namelen != base->ce_namelen ||
226296
strcmp(ce->name, base->name)) {
227297
ce->index = 0;
228298
continue;
229299
}
230-
ce_flags = ce->ce_flags;
231-
base_flags = base->ce_flags;
232-
/* only on-disk flags matter */
233-
ce->ce_flags &= ondisk_flags;
234-
base->ce_flags &= ondisk_flags;
235-
ret = memcmp(&ce->ce_stat_data, &base->ce_stat_data,
236-
offsetof(struct cache_entry, name) -
237-
offsetof(struct cache_entry, ce_stat_data));
238-
ce->ce_flags = ce_flags;
239-
base->ce_flags = base_flags;
240-
if (ret)
300+
/*
301+
* This is the copy of a cache entry that is present
302+
* in the shared index, created by unpack_trees()
303+
* while it constructed a new index.
304+
*/
305+
if (ce->ce_flags & CE_UPDATE_IN_BASE) {
306+
/*
307+
* Already marked for inclusion in the split
308+
* index, either because the corresponding
309+
* file was modified and the cached stat data
310+
* was refreshed, or because the original
311+
* entry already had a replacement entry in
312+
* the split index.
313+
* Nothing to do.
314+
*/
315+
} else if (!ce_uptodate(ce) &&
316+
is_racy_timestamp(istate, ce)) {
317+
/*
318+
* A copy of a racily clean cache entry from
319+
* the shared index. It must be added to
320+
* the split index, so the subsequent
321+
* do_write_index() can smudge its stat data.
322+
*/
241323
ce->ce_flags |= CE_UPDATE_IN_BASE;
324+
} else {
325+
/*
326+
* Thoroughly compare the cached data to see
327+
* whether it should be marked for inclusion
328+
* in the split index.
329+
*
330+
* This comparison might be unnecessary, as
331+
* code paths modifying the cached data do
332+
* set CE_UPDATE_IN_BASE as well.
333+
*/
334+
if (compare_ce_content(ce, base))
335+
ce->ce_flags |= CE_UPDATE_IN_BASE;
336+
}
242337
discard_cache_entry(base);
243338
si->base->cache[ce->index - 1] = ce;
244339
}

t/t1700-split-index.sh

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ sane_unset GIT_TEST_SPLIT_INDEX
1313
sane_unset GIT_TEST_FSMONITOR
1414
sane_unset GIT_TEST_INDEX_THREADS
1515

16+
# Create a file named as $1 with content read from stdin.
17+
# Set the file's mtime to a few seconds in the past to avoid racy situations.
18+
create_non_racy_file () {
19+
cat >"$1" &&
20+
test-tool chmtime =-5 "$1"
21+
}
22+
1623
test_expect_success 'enable split index' '
1724
git config splitIndex.maxPercentChange 100 &&
1825
git update-index --split-index &&
@@ -36,7 +43,7 @@ test_expect_success 'enable split index' '
3643
'
3744

3845
test_expect_success 'add one file' '
39-
: >one &&
46+
create_non_racy_file one &&
4047
git update-index --add one &&
4148
git ls-files --stage >ls-files.actual &&
4249
cat >ls-files.expect <<-EOF &&
@@ -88,7 +95,7 @@ test_expect_success 'enable split index again, "one" now belongs to base index"'
8895
'
8996

9097
test_expect_success 'modify original file, base index untouched' '
91-
echo modified >one &&
98+
echo modified | create_non_racy_file one &&
9299
git update-index one &&
93100
git ls-files --stage >ls-files.actual &&
94101
cat >ls-files.expect <<-EOF &&
@@ -107,7 +114,7 @@ test_expect_success 'modify original file, base index untouched' '
107114
'
108115

109116
test_expect_success 'add another file, which stays index' '
110-
: >two &&
117+
create_non_racy_file two &&
111118
git update-index --add two &&
112119
git ls-files --stage >ls-files.actual &&
113120
cat >ls-files.expect <<-EOF &&
@@ -160,7 +167,7 @@ test_expect_success 'remove file in base index' '
160167
'
161168

162169
test_expect_success 'add original file back' '
163-
: >one &&
170+
create_non_racy_file one &&
164171
git update-index --add one &&
165172
git ls-files --stage >ls-files.actual &&
166173
cat >ls-files.expect <<-EOF &&
@@ -179,7 +186,7 @@ test_expect_success 'add original file back' '
179186
'
180187

181188
test_expect_success 'add new file' '
182-
: >two &&
189+
create_non_racy_file two &&
183190
git update-index --add two &&
184191
git ls-files --stage >actual &&
185192
cat >expect <<-EOF &&
@@ -223,7 +230,7 @@ test_expect_success 'rev-parse --shared-index-path' '
223230

224231
test_expect_success 'set core.splitIndex config variable to true' '
225232
git config core.splitIndex true &&
226-
: >three &&
233+
create_non_racy_file three &&
227234
git update-index --add three &&
228235
git ls-files --stage >ls-files.actual &&
229236
cat >ls-files.expect <<-EOF &&
@@ -258,9 +265,9 @@ test_expect_success 'set core.splitIndex config variable to false' '
258265
test_cmp expect actual
259266
'
260267

261-
test_expect_success 'set core.splitIndex config variable to true' '
268+
test_expect_success 'set core.splitIndex config variable back to true' '
262269
git config core.splitIndex true &&
263-
: >three &&
270+
create_non_racy_file three &&
264271
git update-index --add three &&
265272
BASE=$(test-tool dump-split-index .git/index | grep "^base") &&
266273
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
@@ -270,7 +277,7 @@ test_expect_success 'set core.splitIndex config variable to true' '
270277
deletions:
271278
EOF
272279
test_cmp expect actual &&
273-
: >four &&
280+
create_non_racy_file four &&
274281
git update-index --add four &&
275282
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
276283
cat >expect <<-EOF &&
@@ -284,7 +291,7 @@ test_expect_success 'set core.splitIndex config variable to true' '
284291

285292
test_expect_success 'check behavior with splitIndex.maxPercentChange unset' '
286293
git config --unset splitIndex.maxPercentChange &&
287-
: >five &&
294+
create_non_racy_file five &&
288295
git update-index --add five &&
289296
BASE=$(test-tool dump-split-index .git/index | grep "^base") &&
290297
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
@@ -294,7 +301,7 @@ test_expect_success 'check behavior with splitIndex.maxPercentChange unset' '
294301
deletions:
295302
EOF
296303
test_cmp expect actual &&
297-
: >six &&
304+
create_non_racy_file six &&
298305
git update-index --add six &&
299306
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
300307
cat >expect <<-EOF &&
@@ -308,7 +315,7 @@ test_expect_success 'check behavior with splitIndex.maxPercentChange unset' '
308315

309316
test_expect_success 'check splitIndex.maxPercentChange set to 0' '
310317
git config splitIndex.maxPercentChange 0 &&
311-
: >seven &&
318+
create_non_racy_file seven &&
312319
git update-index --add seven &&
313320
BASE=$(test-tool dump-split-index .git/index | grep "^base") &&
314321
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
@@ -318,7 +325,7 @@ test_expect_success 'check splitIndex.maxPercentChange set to 0' '
318325
deletions:
319326
EOF
320327
test_cmp expect actual &&
321-
: >eight &&
328+
create_non_racy_file eight &&
322329
git update-index --add eight &&
323330
BASE=$(test-tool dump-split-index .git/index | grep "^base") &&
324331
test-tool dump-split-index .git/index | sed "/^own/d" >actual &&
@@ -331,30 +338,30 @@ test_expect_success 'check splitIndex.maxPercentChange set to 0' '
331338
'
332339

333340
test_expect_success 'shared index files expire after 2 weeks by default' '
334-
: >ten &&
341+
create_non_racy_file ten &&
335342
git update-index --add ten &&
336343
test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
337344
just_under_2_weeks_ago=$((5-14*86400)) &&
338345
test-tool chmtime =$just_under_2_weeks_ago .git/sharedindex.* &&
339-
: >eleven &&
346+
create_non_racy_file eleven &&
340347
git update-index --add eleven &&
341348
test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
342349
just_over_2_weeks_ago=$((-1-14*86400)) &&
343350
test-tool chmtime =$just_over_2_weeks_ago .git/sharedindex.* &&
344-
: >twelve &&
351+
create_non_racy_file twelve &&
345352
git update-index --add twelve &&
346353
test $(ls .git/sharedindex.* | wc -l) -le 2
347354
'
348355

349356
test_expect_success 'check splitIndex.sharedIndexExpire set to 16 days' '
350357
git config splitIndex.sharedIndexExpire "16.days.ago" &&
351358
test-tool chmtime =$just_over_2_weeks_ago .git/sharedindex.* &&
352-
: >thirteen &&
359+
create_non_racy_file thirteen &&
353360
git update-index --add thirteen &&
354361
test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
355362
just_over_16_days_ago=$((-1-16*86400)) &&
356363
test-tool chmtime =$just_over_16_days_ago .git/sharedindex.* &&
357-
: >fourteen &&
364+
create_non_racy_file fourteen &&
358365
git update-index --add fourteen &&
359366
test $(ls .git/sharedindex.* | wc -l) -le 2
360367
'
@@ -363,13 +370,13 @@ test_expect_success 'check splitIndex.sharedIndexExpire set to "never" and "now"
363370
git config splitIndex.sharedIndexExpire never &&
364371
just_10_years_ago=$((-365*10*86400)) &&
365372
test-tool chmtime =$just_10_years_ago .git/sharedindex.* &&
366-
: >fifteen &&
373+
create_non_racy_file fifteen &&
367374
git update-index --add fifteen &&
368375
test $(ls .git/sharedindex.* | wc -l) -gt 2 &&
369376
git config splitIndex.sharedIndexExpire now &&
370377
just_1_second_ago=-1 &&
371378
test-tool chmtime =$just_1_second_ago .git/sharedindex.* &&
372-
: >sixteen &&
379+
create_non_racy_file sixteen &&
373380
git update-index --add sixteen &&
374381
test $(ls .git/sharedindex.* | wc -l) -le 2
375382
'
@@ -384,7 +391,7 @@ do
384391
# Create one new shared index file
385392
git config core.sharedrepository "$mode" &&
386393
git config core.splitIndex true &&
387-
: >one &&
394+
create_non_racy_file one &&
388395
git update-index --add one &&
389396
echo "$modebits" >expect &&
390397
test_modebits .git/index >actual &&

0 commit comments

Comments
 (0)