diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 0a4c827..f48dcfd 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -516,13 +516,35 @@ paths: - Storage /storage/user-bucket: delete: - description: Delete the current user's S3 bucket + description: 'Delete the current user''s S3 bucket + + + Note: + + Amazon S3 processes bucket deletion asynchronously. The bucket may + + remain visible for a short period until deletion fully propagates.' operationId: delete_user_bucket responses: - '204': + '202': + content: + application/json: + schema: {} description: Successful Response security: - HTTPBearer: [] summary: Delete User Bucket tags: - Storage + /storage/user-bucket/objects: + delete: + description: Deletes all the objects from current user's S3 bucket + operationId: empty_user_bucket + responses: + '204': + description: Successful Response + security: + - HTTPBearer: [] + summary: Empty User Bucket + tags: + - Storage diff --git a/gen3workflow/aws_utils.py b/gen3workflow/aws_utils.py index 4ff1172..79f4418 100644 --- a/gen3workflow/aws_utils.py +++ b/gen3workflow/aws_utils.py @@ -1,6 +1,6 @@ import json import os -from typing import Tuple, Union +from typing import Optional, Tuple, Union from fastapi import HTTPException import boto3 @@ -472,15 +472,21 @@ def delete_all_bucket_objects(user_id: str, user_bucket_name: str) -> None: raise Exception(response) -def delete_user_bucket(user_id: str) -> Union[str, None]: +def cleanup_user_bucket(user_id: str, delete_bucket: bool = False) -> Union[str, None]: """ - Deletes all objects from a user's S3 bucket before deleting the bucket itself. + Empty a user's S3 bucket and optionally delete the bucket. Args: - user_id (str): The user's unique Gen3 ID + user_id: User identifier used to derive the bucket name. + delete_bucket: If True, delete the bucket after removing all objects. + Defaults to False (only objects are removed). + + Returns: + Bucket name if it exists and cleanup was performed, otherwise None + if the bucket does not exist. Raises: - Exception: If there is an error during the deletion process. + Exception: Propagates unexpected errors during cleanup or deletion. """ user_bucket_name = get_bucket_name_from_user_id(user_id) @@ -493,15 +499,17 @@ def delete_user_bucket(user_id: str) -> Union[str, None]: f"Bucket '{user_bucket_name}' not found for user '{user_id}'." ) return None - - logger.info(f"Deleting bucket '{user_bucket_name}' for user '{user_id}'") try: delete_all_bucket_objects(user_id, user_bucket_name) - s3_client.delete_bucket(Bucket=user_bucket_name) + if delete_bucket: + logger.info( + f"Initializing delete for bucket '{user_bucket_name}' for user '{user_id}'" + ) + s3_client.delete_bucket(Bucket=user_bucket_name) return user_bucket_name except Exception as e: logger.error( - f"Failed to delete bucket '{user_bucket_name}' for user '{user_id}': {e}" + f"Failed to cleanup bucket '{user_bucket_name}' for user '{user_id}': {e}" ) raise diff --git a/gen3workflow/routes/ga4gh_tes.py b/gen3workflow/routes/ga4gh_tes.py index ba98c05..6b043e1 100644 --- a/gen3workflow/routes/ga4gh_tes.py +++ b/gen3workflow/routes/ga4gh_tes.py @@ -37,7 +37,14 @@ async def get_request_body(request: Request) -> dict: body = body_bytes.decode() except UnicodeDecodeError: body = str(body_bytes) # in case of binary data - return json.loads(body) + + try: + return json.loads(body) + except json.JSONDecodeError as e: + raise HTTPException( + status_code=HTTP_400_BAD_REQUEST, + detail=f"Invalid JSON in request body: {e.msg}", + ) @router.get("/service-info", status_code=HTTP_200_OK) diff --git a/gen3workflow/routes/storage.py b/gen3workflow/routes/storage.py index 4d2dca7..0956d07 100644 --- a/gen3workflow/routes/storage.py +++ b/gen3workflow/routes/storage.py @@ -1,5 +1,10 @@ from fastapi import APIRouter, Depends, Request, HTTPException -from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND +from starlette.status import ( + HTTP_200_OK, + HTTP_202_ACCEPTED, + HTTP_204_NO_CONTENT, + HTTP_404_NOT_FOUND, +) from gen3workflow import aws_utils, logger from gen3workflow.auth import Auth @@ -30,20 +35,57 @@ async def get_storage_info(request: Request, auth=Depends(Auth)) -> dict: } -@router.delete("/user-bucket", status_code=HTTP_204_NO_CONTENT) -@router.delete( - "/user-bucket/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False -) +@router.delete("/user-bucket", status_code=HTTP_202_ACCEPTED) +@router.delete("/user-bucket/", status_code=HTTP_202_ACCEPTED, include_in_schema=False) async def delete_user_bucket(request: Request, auth=Depends(Auth)) -> None: """ Delete the current user's S3 bucket + + Note: + Amazon S3 processes bucket deletion asynchronously. The bucket may + remain visible for a short period until deletion fully propagates. """ await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"]) token_claims = await auth.get_token_claims() user_id = token_claims.get("sub") logger.info(f"User '{user_id}' deleting their storage bucket") - deleted_bucket_name = aws_utils.delete_user_bucket(user_id) + deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id, delete_bucket=True) + + if not deleted_bucket_name: + raise HTTPException( + HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found." + ) + + logger.info( + f"Bucket '{deleted_bucket_name}' for user '{user_id}' scheduled for deletion" + ) + + return { + "message": "Bucket deletion initiated.", + "bucket": deleted_bucket_name, + "details": ( + "Amazon S3 processes bucket deletion asynchronously. " + "The bucket may remain visible for a short period until " + "deletion fully propagates across AWS." + ), + } + + +@router.delete("/user-bucket/objects", status_code=HTTP_204_NO_CONTENT) +@router.delete( + "/user-bucket/objects/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False +) +async def empty_user_bucket(request: Request, auth=Depends(Auth)) -> None: + """ + Deletes all the objects from current user's S3 bucket + """ + await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"]) + + token_claims = await auth.get_token_claims() + user_id = token_claims.get("sub") + logger.info(f"User '{user_id}' emptying their storage bucket") + deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id) if not deleted_bucket_name: raise HTTPException( @@ -51,5 +93,5 @@ async def delete_user_bucket(request: Request, auth=Depends(Auth)) -> None: ) logger.info( - f"Bucket '{deleted_bucket_name}' for user '{user_id}' deleted successfully" + f"All objects remvoved from bucket '{deleted_bucket_name}' for user '{user_id}'" ) diff --git a/tests/test_ga4gh_tes.py b/tests/test_ga4gh_tes.py index 758ed04..930a9b8 100644 --- a/tests/test_ga4gh_tes.py +++ b/tests/test_ga4gh_tes.py @@ -265,6 +265,23 @@ async def test_create_task_with_reserved_tags(client, access_token_patcher): } +@pytest.mark.asyncio +async def test_create_task_with_invalid_body(client, access_token_patcher): + """ + Users cannot specify the value of certain reserved tags ("_authz" "_worker_sa" etc.,) themselves + when creating a task, since these are strictly for internal use. + """ + res = await client.post( + "/ga4gh/tes/v1/tasks", + content='{"name": "test-task", "command": ["echo Hello World",""&&", "echo Goodbye!"]}', # Malformed command list + headers={ + "Authorization": f"bearer {TEST_USER_TOKEN}", + "Content-Type": "application/json", + }, + ) + assert res.status_code == 400, res.text + + @pytest.mark.asyncio @pytest.mark.parametrize( "req_body,status_code, error_message", diff --git a/tests/test_misc.py b/tests/test_misc.py index d6b6902..af67cc0 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -255,7 +255,7 @@ async def test_delete_user_bucket( f"/storage/user-bucket{'/' if trailing_slash else ''}", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}, ) - assert res.status_code == 204, res.text + assert res.status_code == 202, res.text # Verify the bucket is deleted with pytest.raises(ClientError) as e: @@ -307,7 +307,7 @@ async def test_delete_user_bucket_with_files( res = await client.delete( "/storage/user-bucket", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"} ) - assert res.status_code == 204, res.text + assert res.status_code == 202, res.text # Verify the bucket is deleted with pytest.raises(ClientError) as e: @@ -324,7 +324,7 @@ async def test_delete_user_bucket_no_token(client, mock_aws_services): """ mock_delete_bucket = MagicMock() # Delete the bucket - with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket): + with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket): res = await client.delete("/storage/user-bucket") assert res.status_code == 401, res.text assert res.json() == {"detail": "Must provide an access token"} @@ -346,7 +346,7 @@ async def test_delete_user_bucket_unauthorized( """ mock_delete_bucket = MagicMock() # Delete the bucket - with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket): + with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket): res = await client.delete( "/storage/user-bucket", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}, @@ -354,3 +354,47 @@ async def test_delete_user_bucket_unauthorized( assert res.status_code == 403, res.text assert res.json() == {"detail": "Permission denied"} mock_delete_bucket.assert_not_called() + + +@pytest.mark.asyncio +async def test_delete_user_bucket_objects_with_existing_files( + client, access_token_patcher, mock_aws_services +): + """ + Attempt to delete all the objects in a bucket that is not empty. + Endpoint must be able to delete all the files but should not delete the bucket. + """ + + # Create the bucket if it doesn't exist + res = await client.get( + "/storage/info", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"} + ) + bucket_name = res.json()["bucket"] + + # Remove the bucket policy enforcing KMS encryption + # Moto has limitations that prevent adding objects to a bucket with KMS encryption enabled. + # More details: https://github.com/uc-cdis/gen3-workflow/blob/554fc3eb4c1d333f9ef81c1a5f8e75a6b208cdeb/tests/test_misc.py#L161-L171 + aws_utils.s3_client.delete_bucket_policy(Bucket=bucket_name) + + object_count = 10 + for i in range(object_count): + aws_utils.s3_client.put_object( + Bucket=bucket_name, Key=f"file_{i}", Body=b"Dummy file contents" + ) + + # Delete all the bucket objects + res = await client.delete( + "/storage/user-bucket/objects", + headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}, + ) + assert res.status_code == 204, res.text + + # Verify the bucket still exists + bucket_exists = aws_utils.s3_client.head_bucket(Bucket=bucket_name) + assert bucket_exists, f"Bucket '{bucket_name} is expected to exist but not found" + + # Verify all the objects in the bucket are deleted + object_list = aws_utils.get_all_bucket_objects(bucket_name) + assert ( + len(object_list) == 0 + ), f"Expected bucket to have no objects, but found {len(object_list)}.\n{object_list=}"