Skip to content

Commit c307cb5

Browse files
authored
Fix corruption during CONFLICT upload (#3499)
* Fix corruption during CONFLICT upload PBENCH-1219 b0.72 ARCHIVE server version Large uploads can time out, causing the client (e.g., the 0.69 passthrough server's dispatch) to retry. Eventually, this will result in an `OK` (200) response, which is good. However if we retry before the original operation finishes (it may be still running, despite the client timeout), we catch the already existing "temporary intake directory" as a `CONFLICT` error. Unfortunately, the cleanup logic doesn't recognize this distinction, and still deleted the intake directory on exit. Timed correctly, this could break the original upload: at best, it results in a noisy termination with complaints that the existed temporary directory no longer exists. Fix this problem by attempting to delete only when this API instance has successfully created the temporary directory. Modify the `CONFLICT` unit test case to reproduce the situation more accurately and additionally validate that the directory still exists after completion.
1 parent f8fb65d commit c307cb5

File tree

2 files changed

+34
-16
lines changed

2 files changed

+34
-16
lines changed

lib/pbench/server/api/resources/upload_api.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def _put(self, args: ApiParams, request: Request, context: ApiContext) -> Respon
178178

179179
attributes = {"access": access, "metadata": metadata}
180180
filename = args.uri["filename"]
181-
tmp_dir: Optional[Path] = None
181+
intake_dir: Optional[Path] = None
182182

183183
try:
184184
try:
@@ -248,13 +248,15 @@ def _put(self, args: ApiParams, request: Request, context: ApiContext) -> Respon
248248
try:
249249
tmp_dir = self.temporary / md5sum
250250
tmp_dir.mkdir()
251-
except FileExistsError:
251+
except FileExistsError as e:
252252
raise CleanupTime(
253253
HTTPStatus.CONFLICT,
254-
"Temporary upload directory already exists",
255-
)
256-
tar_full_path = tmp_dir / filename
257-
md5_full_path = tmp_dir / f"{filename}.md5"
254+
"Dataset is currently being uploaded",
255+
) from e
256+
else:
257+
intake_dir = tmp_dir
258+
tar_full_path = intake_dir / filename
259+
md5_full_path = intake_dir / f"{filename}.md5"
258260

259261
bytes_received = 0
260262
usage = shutil.disk_usage(tar_full_path.parent)
@@ -523,9 +525,9 @@ def _put(self, args: ApiParams, request: Request, context: ApiContext) -> Respon
523525
else:
524526
raise APIAbort(status, message) from e
525527
finally:
526-
if tmp_dir:
528+
if intake_dir:
527529
try:
528-
shutil.rmtree(tmp_dir)
530+
shutil.rmtree(intake_dir)
529531
except Exception as e:
530532
current_app.logger.warning("Error removing {}: {}", tmp_dir, str(e))
531533

lib/pbench/test/unit/server/test_upload.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from http import HTTPStatus
22
from logging import Logger
33
from pathlib import Path
4-
from typing import Any
4+
from typing import Any, Optional
55

66
from freezegun import freeze_time
77
import pytest
@@ -289,19 +289,33 @@ def test_empty_upload(
289289
def test_temp_exists(
290290
self, monkeypatch, client, tmp_path, server_config, pbench_drb_token
291291
):
292+
"""Test behavior of a conflicting upload
293+
294+
When the MD5-based temporary intake directory exists already, upload
295+
will fail with CONFLICT. We want to verify that behavior, and that we
296+
don't delete the existing directory during cleanup, which could
297+
interfere with a running upload. This can happen, for example, when a
298+
large upload times out and the client retries before the original is
299+
finished.
300+
"""
292301
md5 = "d41d8cd98f00b204e9800998ecf8427e"
302+
temp_path: Optional[Path] = None
293303

294304
def td_exists(self, *args, **kwargs):
295305
"""Mock out Path.mkdir()
296306
297307
The trick here is that calling the UPLOAD API results in two calls
298308
to Path.mkdir: one in the __init__ to be sure that ARCHIVE/UPLOAD
299-
exists, and the second for the temporary subdirectory. We want the
300-
first to succeed normally so we'll pass the call to the real mkdir
301-
if the path doesn't end with our MD5 value.
309+
exists, and the second for the temporary subdirectory. We want to
310+
create both directories, but for the second (MD5-based intake temp)
311+
we want to raise FileExistsError as if it had already existed, to
312+
trigger the duplicate upload logic.
302313
"""
314+
retval = self.real_mkdir(*args, **kwargs)
303315
if self.name != md5:
304-
return self.real_mkdir(*args, **kwargs)
316+
return retval
317+
nonlocal temp_path
318+
temp_path = self
305319
raise FileExistsError(str(self))
306320

307321
filename = "tmp.tar.xz"
@@ -317,9 +331,11 @@ def td_exists(self, *args, **kwargs):
317331
headers=self.gen_headers(pbench_drb_token, md5),
318332
)
319333
assert response.status_code == HTTPStatus.CONFLICT
320-
assert (
321-
response.json.get("message") == "Temporary upload directory already exists"
322-
)
334+
335+
# Assert that we captured an intake temporary directory path and that
336+
# the "duplicate" path wasn't deleted during API cleanup.
337+
assert temp_path and temp_path.is_dir()
338+
assert response.json.get("message") == "Dataset is currently being uploaded"
323339
assert not self.cachemanager_created
324340

325341
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)