Skip to content

Commit 9a1c668

Browse files
authored
Add /user-bucket/objects delete endpoint; return 400 for invalid TES requests (#101)
1 parent aefe97c commit 9a1c668

File tree

6 files changed

+163
-23
lines changed

6 files changed

+163
-23
lines changed

docs/openapi.yaml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,13 +516,35 @@ paths:
516516
- Storage
517517
/storage/user-bucket:
518518
delete:
519-
description: Delete the current user's S3 bucket
519+
description: 'Delete the current user''s S3 bucket
520+
521+
522+
Note:
523+
524+
Amazon S3 processes bucket deletion asynchronously. The bucket may
525+
526+
remain visible for a short period until deletion fully propagates.'
520527
operationId: delete_user_bucket
521528
responses:
522-
'204':
529+
'202':
530+
content:
531+
application/json:
532+
schema: {}
523533
description: Successful Response
524534
security:
525535
- HTTPBearer: []
526536
summary: Delete User Bucket
527537
tags:
528538
- Storage
539+
/storage/user-bucket/objects:
540+
delete:
541+
description: Deletes all the objects from current user's S3 bucket
542+
operationId: empty_user_bucket
543+
responses:
544+
'204':
545+
description: Successful Response
546+
security:
547+
- HTTPBearer: []
548+
summary: Empty User Bucket
549+
tags:
550+
- Storage

gen3workflow/aws_utils.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
import os
3-
from typing import Tuple, Union
3+
from typing import Optional, Tuple, Union
44

55
from fastapi import HTTPException
66
import boto3
@@ -472,15 +472,21 @@ def delete_all_bucket_objects(user_id: str, user_bucket_name: str) -> None:
472472
raise Exception(response)
473473

474474

475-
def delete_user_bucket(user_id: str) -> Union[str, None]:
475+
def cleanup_user_bucket(user_id: str, delete_bucket: bool = False) -> Union[str, None]:
476476
"""
477-
Deletes all objects from a user's S3 bucket before deleting the bucket itself.
477+
Empty a user's S3 bucket and optionally delete the bucket.
478478
479479
Args:
480-
user_id (str): The user's unique Gen3 ID
480+
user_id: User identifier used to derive the bucket name.
481+
delete_bucket: If True, delete the bucket after removing all objects.
482+
Defaults to False (only objects are removed).
483+
484+
Returns:
485+
Bucket name if it exists and cleanup was performed, otherwise None
486+
if the bucket does not exist.
481487
482488
Raises:
483-
Exception: If there is an error during the deletion process.
489+
Exception: Propagates unexpected errors during cleanup or deletion.
484490
"""
485491
user_bucket_name = get_bucket_name_from_user_id(user_id)
486492

@@ -493,15 +499,17 @@ def delete_user_bucket(user_id: str) -> Union[str, None]:
493499
f"Bucket '{user_bucket_name}' not found for user '{user_id}'."
494500
)
495501
return None
496-
497-
logger.info(f"Deleting bucket '{user_bucket_name}' for user '{user_id}'")
498502
try:
499503
delete_all_bucket_objects(user_id, user_bucket_name)
500-
s3_client.delete_bucket(Bucket=user_bucket_name)
504+
if delete_bucket:
505+
logger.info(
506+
f"Initializing delete for bucket '{user_bucket_name}' for user '{user_id}'"
507+
)
508+
s3_client.delete_bucket(Bucket=user_bucket_name)
501509
return user_bucket_name
502510

503511
except Exception as e:
504512
logger.error(
505-
f"Failed to delete bucket '{user_bucket_name}' for user '{user_id}': {e}"
513+
f"Failed to cleanup bucket '{user_bucket_name}' for user '{user_id}': {e}"
506514
)
507515
raise

gen3workflow/routes/ga4gh_tes.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,14 @@ async def get_request_body(request: Request) -> dict:
3737
body = body_bytes.decode()
3838
except UnicodeDecodeError:
3939
body = str(body_bytes) # in case of binary data
40-
return json.loads(body)
40+
41+
try:
42+
return json.loads(body)
43+
except json.JSONDecodeError as e:
44+
raise HTTPException(
45+
status_code=HTTP_400_BAD_REQUEST,
46+
detail=f"Invalid JSON in request body: {e.msg}",
47+
)
4148

4249

4350
@router.get("/service-info", status_code=HTTP_200_OK)

gen3workflow/routes/storage.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from fastapi import APIRouter, Depends, Request, HTTPException
2-
from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
2+
from starlette.status import (
3+
HTTP_200_OK,
4+
HTTP_202_ACCEPTED,
5+
HTTP_204_NO_CONTENT,
6+
HTTP_404_NOT_FOUND,
7+
)
38

49
from gen3workflow import aws_utils, logger
510
from gen3workflow.auth import Auth
@@ -30,26 +35,63 @@ async def get_storage_info(request: Request, auth=Depends(Auth)) -> dict:
3035
}
3136

3237

33-
@router.delete("/user-bucket", status_code=HTTP_204_NO_CONTENT)
34-
@router.delete(
35-
"/user-bucket/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False
36-
)
38+
@router.delete("/user-bucket", status_code=HTTP_202_ACCEPTED)
39+
@router.delete("/user-bucket/", status_code=HTTP_202_ACCEPTED, include_in_schema=False)
3740
async def delete_user_bucket(request: Request, auth=Depends(Auth)) -> None:
3841
"""
3942
Delete the current user's S3 bucket
43+
44+
Note:
45+
Amazon S3 processes bucket deletion asynchronously. The bucket may
46+
remain visible for a short period until deletion fully propagates.
4047
"""
4148
await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"])
4249

4350
token_claims = await auth.get_token_claims()
4451
user_id = token_claims.get("sub")
4552
logger.info(f"User '{user_id}' deleting their storage bucket")
46-
deleted_bucket_name = aws_utils.delete_user_bucket(user_id)
53+
deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id, delete_bucket=True)
54+
55+
if not deleted_bucket_name:
56+
raise HTTPException(
57+
HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found."
58+
)
59+
60+
logger.info(
61+
f"Bucket '{deleted_bucket_name}' for user '{user_id}' scheduled for deletion"
62+
)
63+
64+
return {
65+
"message": "Bucket deletion initiated.",
66+
"bucket": deleted_bucket_name,
67+
"details": (
68+
"Amazon S3 processes bucket deletion asynchronously. "
69+
"The bucket may remain visible for a short period until "
70+
"deletion fully propagates across AWS."
71+
),
72+
}
73+
74+
75+
@router.delete("/user-bucket/objects", status_code=HTTP_204_NO_CONTENT)
76+
@router.delete(
77+
"/user-bucket/objects/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False
78+
)
79+
async def empty_user_bucket(request: Request, auth=Depends(Auth)) -> None:
80+
"""
81+
Deletes all the objects from current user's S3 bucket
82+
"""
83+
await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"])
84+
85+
token_claims = await auth.get_token_claims()
86+
user_id = token_claims.get("sub")
87+
logger.info(f"User '{user_id}' emptying their storage bucket")
88+
deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id)
4789

4890
if not deleted_bucket_name:
4991
raise HTTPException(
5092
HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found."
5193
)
5294

5395
logger.info(
54-
f"Bucket '{deleted_bucket_name}' for user '{user_id}' deleted successfully"
96+
f"All objects remvoved from bucket '{deleted_bucket_name}' for user '{user_id}'"
5597
)

tests/test_ga4gh_tes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,23 @@ async def test_create_task_with_reserved_tags(client, access_token_patcher):
265265
}
266266

267267

268+
@pytest.mark.asyncio
269+
async def test_create_task_with_invalid_body(client, access_token_patcher):
270+
"""
271+
Users cannot specify the value of certain reserved tags ("_authz" "_worker_sa" etc.,) themselves
272+
when creating a task, since these are strictly for internal use.
273+
"""
274+
res = await client.post(
275+
"/ga4gh/tes/v1/tasks",
276+
content='{"name": "test-task", "command": ["echo Hello World",""&&", "echo Goodbye!"]}', # Malformed command list
277+
headers={
278+
"Authorization": f"bearer {TEST_USER_TOKEN}",
279+
"Content-Type": "application/json",
280+
},
281+
)
282+
assert res.status_code == 400, res.text
283+
284+
268285
@pytest.mark.asyncio
269286
@pytest.mark.parametrize(
270287
"req_body,status_code, error_message",

tests/test_misc.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ async def test_delete_user_bucket(
255255
f"/storage/user-bucket{'/' if trailing_slash else ''}",
256256
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
257257
)
258-
assert res.status_code == 204, res.text
258+
assert res.status_code == 202, res.text
259259

260260
# Verify the bucket is deleted
261261
with pytest.raises(ClientError) as e:
@@ -307,7 +307,7 @@ async def test_delete_user_bucket_with_files(
307307
res = await client.delete(
308308
"/storage/user-bucket", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}
309309
)
310-
assert res.status_code == 204, res.text
310+
assert res.status_code == 202, res.text
311311

312312
# Verify the bucket is deleted
313313
with pytest.raises(ClientError) as e:
@@ -324,7 +324,7 @@ async def test_delete_user_bucket_no_token(client, mock_aws_services):
324324
"""
325325
mock_delete_bucket = MagicMock()
326326
# Delete the bucket
327-
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
327+
with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket):
328328
res = await client.delete("/storage/user-bucket")
329329
assert res.status_code == 401, res.text
330330
assert res.json() == {"detail": "Must provide an access token"}
@@ -346,11 +346,55 @@ async def test_delete_user_bucket_unauthorized(
346346
"""
347347
mock_delete_bucket = MagicMock()
348348
# Delete the bucket
349-
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
349+
with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket):
350350
res = await client.delete(
351351
"/storage/user-bucket",
352352
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
353353
)
354354
assert res.status_code == 403, res.text
355355
assert res.json() == {"detail": "Permission denied"}
356356
mock_delete_bucket.assert_not_called()
357+
358+
359+
@pytest.mark.asyncio
360+
async def test_delete_user_bucket_objects_with_existing_files(
361+
client, access_token_patcher, mock_aws_services
362+
):
363+
"""
364+
Attempt to delete all the objects in a bucket that is not empty.
365+
Endpoint must be able to delete all the files but should not delete the bucket.
366+
"""
367+
368+
# Create the bucket if it doesn't exist
369+
res = await client.get(
370+
"/storage/info", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}
371+
)
372+
bucket_name = res.json()["bucket"]
373+
374+
# Remove the bucket policy enforcing KMS encryption
375+
# Moto has limitations that prevent adding objects to a bucket with KMS encryption enabled.
376+
# More details: https://github.com/uc-cdis/gen3-workflow/blob/554fc3eb4c1d333f9ef81c1a5f8e75a6b208cdeb/tests/test_misc.py#L161-L171
377+
aws_utils.s3_client.delete_bucket_policy(Bucket=bucket_name)
378+
379+
object_count = 10
380+
for i in range(object_count):
381+
aws_utils.s3_client.put_object(
382+
Bucket=bucket_name, Key=f"file_{i}", Body=b"Dummy file contents"
383+
)
384+
385+
# Delete all the bucket objects
386+
res = await client.delete(
387+
"/storage/user-bucket/objects",
388+
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
389+
)
390+
assert res.status_code == 204, res.text
391+
392+
# Verify the bucket still exists
393+
bucket_exists = aws_utils.s3_client.head_bucket(Bucket=bucket_name)
394+
assert bucket_exists, f"Bucket '{bucket_name} is expected to exist but not found"
395+
396+
# Verify all the objects in the bucket are deleted
397+
object_list = aws_utils.get_all_bucket_objects(bucket_name)
398+
assert (
399+
len(object_list) == 0
400+
), f"Expected bucket to have no objects, but found {len(object_list)}.\n{object_list=}"

0 commit comments

Comments
 (0)