Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/integration_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ jobs:
uses: uc-cdis/.github/.github/workflows/integration_tests.yaml@master
with:
SERVICE_TO_TEST: gen3_workflow
TEST_REPO_BRANCH: cleanup_gen3wf_aws_resources
secrets:
CI_TEST_ORCID_USERID: ${{ secrets.CI_TEST_ORCID_USERID }}
CI_TEST_ORCID_PASSWORD: ${{ secrets.CI_TEST_ORCID_PASSWORD }}
Expand Down
26 changes: 24 additions & 2 deletions docs/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -516,13 +516,35 @@ paths:
- Storage
/storage/user-bucket:
delete:
description: Delete the current user's S3 bucket
description: 'Delete the current user''s S3 bucket
Note:
Amazon S3 processes bucket deletion asynchronously. The bucket may
remain visible for a short period until deletion fully propagates.'
operationId: delete_user_bucket
responses:
'204':
'202':
content:
application/json:
schema: {}
description: Successful Response
security:
- HTTPBearer: []
summary: Delete User Bucket
tags:
- Storage
/storage/user-bucket/objects:
delete:
description: Deletes all the objects from current user's S3 bucket
operationId: empty_user_bucket
responses:
'204':
description: Successful Response
security:
- HTTPBearer: []
summary: Empty User Bucket
tags:
- Storage
26 changes: 17 additions & 9 deletions gen3workflow/aws_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
from typing import Tuple, Union
from typing import Optional, Tuple, Union

from fastapi import HTTPException
import boto3
Expand Down Expand Up @@ -472,15 +472,21 @@ def delete_all_bucket_objects(user_id: str, user_bucket_name: str) -> None:
raise Exception(response)


def delete_user_bucket(user_id: str) -> Union[str, None]:
def cleanup_user_bucket(user_id: str, delete_bucket: bool = False) -> Union[str, None]:
"""
Deletes all objects from a user's S3 bucket before deleting the bucket itself.
Empty a user's S3 bucket and optionally delete the bucket.
Args:
user_id (str): The user's unique Gen3 ID
user_id: User identifier used to derive the bucket name.
delete_bucket: If True, delete the bucket after removing all objects.
Defaults to False (only objects are removed).
Returns:
Bucket name if it exists and cleanup was performed, otherwise None
if the bucket does not exist.
Raises:
Exception: If there is an error during the deletion process.
Exception: Propagates unexpected errors during cleanup or deletion.
"""
user_bucket_name = get_bucket_name_from_user_id(user_id)

Expand All @@ -493,15 +499,17 @@ def delete_user_bucket(user_id: str) -> Union[str, None]:
f"Bucket '{user_bucket_name}' not found for user '{user_id}'."
)
return None

logger.info(f"Deleting bucket '{user_bucket_name}' for user '{user_id}'")
try:
delete_all_bucket_objects(user_id, user_bucket_name)
s3_client.delete_bucket(Bucket=user_bucket_name)
if delete_bucket:
logger.info(
f"Initializing delete for bucket '{user_bucket_name}' for user '{user_id}'"
)
s3_client.delete_bucket(Bucket=user_bucket_name)
return user_bucket_name

except Exception as e:
logger.error(
f"Failed to delete bucket '{user_bucket_name}' for user '{user_id}': {e}"
f"Failed to cleanup bucket '{user_bucket_name}' for user '{user_id}': {e}"
)
raise
9 changes: 8 additions & 1 deletion gen3workflow/routes/ga4gh_tes.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ async def get_request_body(request: Request) -> dict:
body = body_bytes.decode()
except UnicodeDecodeError:
body = str(body_bytes) # in case of binary data
return json.loads(body)

try:
return json.loads(body)
except json.JSONDecodeError as e:
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON in request body: {e.msg}",
)


@router.get("/service-info", status_code=HTTP_200_OK)
Expand Down
56 changes: 49 additions & 7 deletions gen3workflow/routes/storage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from fastapi import APIRouter, Depends, Request, HTTPException
from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
from starlette.status import (
HTTP_200_OK,
HTTP_202_ACCEPTED,
HTTP_204_NO_CONTENT,
HTTP_404_NOT_FOUND,
)

from gen3workflow import aws_utils, logger
from gen3workflow.auth import Auth
Expand Down Expand Up @@ -30,26 +35,63 @@ async def get_storage_info(request: Request, auth=Depends(Auth)) -> dict:
}


@router.delete("/user-bucket", status_code=HTTP_204_NO_CONTENT)
@router.delete(
"/user-bucket/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False
)
@router.delete("/user-bucket", status_code=HTTP_202_ACCEPTED)
@router.delete("/user-bucket/", status_code=HTTP_202_ACCEPTED, include_in_schema=False)
async def delete_user_bucket(request: Request, auth=Depends(Auth)) -> None:
"""
Delete the current user's S3 bucket
Note:
Amazon S3 processes bucket deletion asynchronously. The bucket may
remain visible for a short period until deletion fully propagates.
"""
await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"])

token_claims = await auth.get_token_claims()
user_id = token_claims.get("sub")
logger.info(f"User '{user_id}' deleting their storage bucket")
deleted_bucket_name = aws_utils.delete_user_bucket(user_id)
deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id, delete_bucket=True)

if not deleted_bucket_name:
raise HTTPException(
HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found."
)

logger.info(
f"Bucket '{deleted_bucket_name}' for user '{user_id}' scheduled for deletion"
)

return {
"message": "Bucket deletion initiated.",
"bucket": deleted_bucket_name,
"details": (
"Amazon S3 processes bucket deletion asynchronously. "
"The bucket may remain visible for a short period until "
"deletion fully propagates across AWS."
Comment on lines +67 to +70
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

),
}


@router.delete("/user-bucket/objects", status_code=HTTP_204_NO_CONTENT)
@router.delete(
"/user-bucket/objects/", status_code=HTTP_204_NO_CONTENT, include_in_schema=False
)
async def empty_user_bucket(request: Request, auth=Depends(Auth)) -> None:
"""
Deletes all the objects from current user's S3 bucket
"""
await auth.authorize("delete", ["/services/workflow/gen3-workflow/user-bucket"])

token_claims = await auth.get_token_claims()
user_id = token_claims.get("sub")
logger.info(f"User '{user_id}' emptying their storage bucket")
deleted_bucket_name = aws_utils.cleanup_user_bucket(user_id)

if not deleted_bucket_name:
raise HTTPException(
HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found."
)

logger.info(
f"Bucket '{deleted_bucket_name}' for user '{user_id}' deleted successfully"
f"All objects remvoved from bucket '{deleted_bucket_name}' for user '{user_id}'"
)
17 changes: 17 additions & 0 deletions tests/test_ga4gh_tes.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,23 @@ async def test_create_task_with_reserved_tags(client, access_token_patcher):
}


@pytest.mark.asyncio
async def test_create_task_with_invalid_body(client, access_token_patcher):
"""
Users cannot specify the value of certain reserved tags ("_authz" "_worker_sa" etc.,) themselves
when creating a task, since these are strictly for internal use.
"""
res = await client.post(
"/ga4gh/tes/v1/tasks",
content='{"name": "test-task", "command": ["echo Hello World",""&&", "echo Goodbye!"]}', # Malformed command list
headers={
"Authorization": f"bearer {TEST_USER_TOKEN}",
"Content-Type": "application/json",
},
)
assert res.status_code == 400, res.text


@pytest.mark.asyncio
@pytest.mark.parametrize(
"req_body,status_code, error_message",
Expand Down
52 changes: 48 additions & 4 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ async def test_delete_user_bucket(
f"/storage/user-bucket{'/' if trailing_slash else ''}",
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
)
assert res.status_code == 204, res.text
assert res.status_code == 202, res.text

# Verify the bucket is deleted
with pytest.raises(ClientError) as e:
Expand Down Expand Up @@ -307,7 +307,7 @@ async def test_delete_user_bucket_with_files(
res = await client.delete(
"/storage/user-bucket", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}
)
assert res.status_code == 204, res.text
assert res.status_code == 202, res.text

# Verify the bucket is deleted
with pytest.raises(ClientError) as e:
Expand All @@ -324,7 +324,7 @@ async def test_delete_user_bucket_no_token(client, mock_aws_services):
"""
mock_delete_bucket = MagicMock()
# Delete the bucket
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket):
res = await client.delete("/storage/user-bucket")
assert res.status_code == 401, res.text
assert res.json() == {"detail": "Must provide an access token"}
Expand All @@ -346,11 +346,55 @@ async def test_delete_user_bucket_unauthorized(
"""
mock_delete_bucket = MagicMock()
# Delete the bucket
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
with patch("gen3workflow.aws_utils.cleanup_user_bucket", mock_delete_bucket):
res = await client.delete(
"/storage/user-bucket",
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
)
assert res.status_code == 403, res.text
assert res.json() == {"detail": "Permission denied"}
mock_delete_bucket.assert_not_called()


@pytest.mark.asyncio
async def test_delete_user_bucket_objects_with_existing_files(
client, access_token_patcher, mock_aws_services
):
"""
Attempt to delete all the objects in a bucket that is not empty.
Endpoint must be able to delete all the files but should not delete the bucket.
"""

# Create the bucket if it doesn't exist
res = await client.get(
"/storage/info", headers={"Authorization": f"bearer {TEST_USER_TOKEN}"}
)
bucket_name = res.json()["bucket"]

# Remove the bucket policy enforcing KMS encryption
# Moto has limitations that prevent adding objects to a bucket with KMS encryption enabled.
# More details: https://github.com/uc-cdis/gen3-workflow/blob/554fc3eb4c1d333f9ef81c1a5f8e75a6b208cdeb/tests/test_misc.py#L161-L171
aws_utils.s3_client.delete_bucket_policy(Bucket=bucket_name)

object_count = 10
for i in range(object_count):
aws_utils.s3_client.put_object(
Bucket=bucket_name, Key=f"file_{i}", Body=b"Dummy file contents"
)

# Delete all the bucket objects
res = await client.delete(
"/storage/user-bucket/objects",
headers={"Authorization": f"bearer {TEST_USER_TOKEN}"},
)
assert res.status_code == 204, res.text

# Verify the bucket still exists
bucket_exists = aws_utils.s3_client.head_bucket(Bucket=bucket_name)
assert bucket_exists, f"Bucket '{bucket_name} is expected to exist but not found"

# Verify all the objects in the bucket are deleted
object_list = aws_utils.get_all_bucket_objects(bucket_name)
assert (
len(object_list) == 0
), f"Expected bucket to have no objects, but found {len(object_list)}.\n{object_list=}"
Loading