Skip to content

Commit 6135cd1

Browse files
jeffhostetlerdscho
authored andcommitted
gvfs-helper: better support for concurrent packfile fetches
Teach gvfs-helper to better support the concurrent fetching of the same packfile by multiple instances. If 2 instances of gvfs-helper did a POST and requested the same set of OIDs, they might receive the exact same packfile (same checksum SHA). Both processes would then race to install their copy of the .pack and .idx files into the ODB/pack directory. This is not a problem on Unix (because of filesystem semantics). On Windows, this can cause an EBUSY/EPERM problem for the loser while the winner is holding a handle to the target files. (The existing packfile code already handled simple the existence and/or replacement case.) The solution presented here is to silently let the loser claim victory IIF the .pack and .idx are already present in the ODB. (We can't check this in advance because we don't know the packfile SHA checksum until after we receive it and run index-pack.) We avoid using a per-packfile lockfile (or a single lockfile for the `vfs-` prefix) to avoid the usual issues with stale lockfiles. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent 94679e4 commit 6135cd1

File tree

2 files changed

+141
-2
lines changed

2 files changed

+141
-2
lines changed

gvfs-helper.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,12 +1889,36 @@ static void my_finalize_packfile(struct gh__request_params *params,
18891889
struct strbuf *final_path_idx,
18901890
struct strbuf *final_filename)
18911891
{
1892+
/*
1893+
* Install the .pack and .idx into the ODB pack directory.
1894+
*
1895+
* We might be racing with other instances of gvfs-helper if
1896+
* we, in parallel, both downloaded the exact same packfile
1897+
* (with the same checksum SHA) and try to install it at the
1898+
* same time. This might happen on Windows where the loser
1899+
* can get an EBUSY or EPERM trying to move/rename the
1900+
* tempfile into the pack dir, for example.
1901+
*
1902+
* So, we always install the .pack before the .idx for
1903+
* consistency. And only if *WE* created the .pack and .idx
1904+
* files, do we create the matching .keep (when requested).
1905+
*
1906+
* If we get an error and the target files already exist, we
1907+
* silently eat the error. Note that finalize_object_file()
1908+
* has already munged errno (and it has various creation
1909+
* strategies), so we don't bother looking at it.
1910+
*/
18921911
if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) ||
18931912
finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) {
18941913
unlink(temp_path_pack->buf);
18951914
unlink(temp_path_idx->buf);
1896-
unlink(final_path_pack->buf);
1897-
unlink(final_path_idx->buf);
1915+
1916+
if (file_exists(final_path_pack->buf) &&
1917+
file_exists(final_path_idx->buf)) {
1918+
trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf);
1919+
goto assume_ok;
1920+
}
1921+
18981922
strbuf_addf(&status->error_message,
18991923
"could not install packfile '%s'",
19001924
final_path_pack->buf);
@@ -1917,6 +1941,7 @@ static void my_finalize_packfile(struct gh__request_params *params,
19171941
strbuf_release(&keep);
19181942
}
19191943

1944+
assume_ok:
19201945
if (params->result_list) {
19211946
struct strbuf result_msg = STRBUF_INIT;
19221947

t/t5799-gvfs-helper.sh

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,10 @@ verify_objects_in_shared_cache () {
372372
return 0
373373
}
374374

375+
# gvfs-helper prints a "packfile <path>" message for each received
376+
# packfile to stdout. Verify that we received the expected number
377+
# of packfiles.
378+
#
375379
verify_received_packfile_count () {
376380
if test $# -eq 1
377381
then
@@ -414,6 +418,19 @@ verify_prefetch_keeps () {
414418
return 0
415419
}
416420

421+
# Verify that the number of vfs- packfile present in the shared-cache
422+
# matches our expectations.
423+
#
424+
verify_vfs_packfile_count () {
425+
count=$(( $(ls -1 "$SHARED_CACHE_T1"/pack/vfs-*.pack | wc -l) ))
426+
if test $count -ne $1
427+
then
428+
echo "verify_vfs_packfile_count: expected $1; actual $count"
429+
return 1
430+
fi
431+
return 0
432+
}
433+
417434
per_test_cleanup () {
418435
stop_gvfs_protocol_server
419436

@@ -1186,6 +1203,103 @@ test_expect_success 'integration: fully implicit: diff 2 commits' '
11861203
>OUT.output 2>OUT.stderr
11871204
'
11881205

1206+
#################################################################
1207+
# Duplicate packfile tests.
1208+
#
1209+
# If we request a fixed set of blobs, we should get a unique packfile
1210+
# of the form "vfs-<sha>.{pack,idx}". It we request that same set
1211+
# again, the server should create and send the exact same packfile.
1212+
# True web servers might build the custom packfile in random order,
1213+
# but our test web server should give us consistent results.
1214+
#
1215+
# Verify that we can handle the duplicate pack and idx file properly.
1216+
#################################################################
1217+
1218+
test_expect_success 'duplicate: vfs- packfile' '
1219+
test_when_finished "per_test_cleanup" &&
1220+
start_gvfs_protocol_server &&
1221+
1222+
git -C "$REPO_T1" gvfs-helper \
1223+
--cache-server=disable \
1224+
--remote=origin \
1225+
--no-progress \
1226+
post \
1227+
<"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr &&
1228+
verify_received_packfile_count 1 &&
1229+
verify_vfs_packfile_count 1 &&
1230+
1231+
# Re-fetch the same packfile. We do not care if it replaces
1232+
# first one or if it silently fails to overwrite the existing
1233+
# one. We just confirm that afterwards we only have 1 packfile.
1234+
#
1235+
git -C "$REPO_T1" gvfs-helper \
1236+
--cache-server=disable \
1237+
--remote=origin \
1238+
--no-progress \
1239+
post \
1240+
<"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr &&
1241+
verify_received_packfile_count 1 &&
1242+
verify_vfs_packfile_count 1 &&
1243+
1244+
stop_gvfs_protocol_server
1245+
'
1246+
1247+
# Return the absolute pathname of the first received packfile.
1248+
#
1249+
first_received_packfile_pathname () {
1250+
fn=$(sed -n '/^packfile/p' <OUT.output | head -1 | sed -n 's/^packfile \(.*\)/\1/p')
1251+
echo "$SHARED_CACHE_T1"/pack/"$fn"
1252+
return 0
1253+
}
1254+
1255+
test_expect_success 'duplicate and busy: vfs- packfile' '
1256+
test_when_finished "per_test_cleanup" &&
1257+
start_gvfs_protocol_server &&
1258+
1259+
git -C "$REPO_T1" gvfs-helper \
1260+
--cache-server=disable \
1261+
--remote=origin \
1262+
--no-progress \
1263+
post \
1264+
<"$OIDS_BLOBS_FILE" \
1265+
>OUT.output \
1266+
2>OUT.stderr &&
1267+
verify_received_packfile_count 1 &&
1268+
verify_vfs_packfile_count 1 &&
1269+
1270+
# Re-fetch the same packfile, but hold the existing packfile
1271+
# open for writing on an obscure (and randomly-chosen) file
1272+
# descriptor.
1273+
#
1274+
# This should cause the replacement-install to fail (at least
1275+
# on Windows) with an EBUSY or EPERM or something.
1276+
#
1277+
# Verify that that error is eaten. We do not care if the
1278+
# replacement is retried or if gvfs-helper simply discards the
1279+
# second instance. We just confirm that afterwards we only
1280+
# have 1 packfile on disk and that the command "lies" and reports
1281+
# that it created the existing packfile. (We want the lie because
1282+
# in normal usage, gh-client has already built the packed-git list
1283+
# in memory and is using gvfs-helper to fetch missing objects;
1284+
# gh-client does not care who does the fetch, but it needs to
1285+
# update its packed-git list and restart the object lookup.)
1286+
#
1287+
PACK=$(first_received_packfile_pathname) &&
1288+
git -C "$REPO_T1" gvfs-helper \
1289+
--cache-server=disable \
1290+
--remote=origin \
1291+
--no-progress \
1292+
post \
1293+
<"$OIDS_BLOBS_FILE" \
1294+
>OUT.output \
1295+
2>OUT.stderr \
1296+
9>>"$PACK" &&
1297+
verify_received_packfile_count 1 &&
1298+
verify_vfs_packfile_count 1 &&
1299+
1300+
stop_gvfs_protocol_server
1301+
'
1302+
11891303
#################################################################
11901304
# Ensure that the SHA of the blob we received matches the SHA of
11911305
# the blob we requested.

0 commit comments

Comments
 (0)