Skip to content

Commit 8de8a62

Browse files
authored
Add DELETE /storage/user-bucket endpoint (#66)
* Add `DELETE /storage/user-bucket` endpoint * Add unit tests to `DELETE /storage/user-bucket` endpoint
1 parent 554fc3e commit 8de8a62

File tree

5 files changed

+269
-2
lines changed

5 files changed

+269
-2
lines changed

docs/authorization.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Contents:
1313
- Users are automatically granted access to `/users/<user ID>/gen3-workflow/tasks` so they can view their own tasks.
1414
- Admin access (the ability to see _all_ users’ tasks instead of just your own) can be granted to a user by granting them access to the parent resource `/services/workflow/gen3-workflow/tasks`.
1515
- This supports sharing tasks with others; for example, "user1" may share "taskA" with "user2" if the system grants "user2" access to `/users/user1/gen3-workflow/tasks/taskA`.
16+
- To delete their own S3 bucket along with all its objects, a user needs `delete` access to the resource `/services/workflow/user-bucket` on the `gen3-workflow` service -- a special privilege useful for automated testing but not intended for the average user.
1617

1718
#### Authorization configuration example
1819

@@ -45,6 +46,12 @@ authz:
4546
- gen3_workflow_reader
4647
resource_paths:
4748
- /services/workflow/gen3-workflow/tasks
49+
- id: workflow_storage_deleter
50+
description: Allows delete access to the user's own S3 bucket
51+
role_ids:
52+
- workflow_storage_deleter
53+
resource_paths:
54+
- /services/workflow/gen3-workflow
4855

4956
roles:
5057
- id: gen3_workflow_reader
@@ -59,4 +66,10 @@ authz:
5966
action:
6067
service: gen3-workflow
6168
method: create
69+
- id: workflow_storage_deleter
70+
permissions:
71+
- id: workflow_storage_deleter
72+
action:
73+
service: gen3-workflow
74+
method: delete
6275
```

docs/openapi.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,14 @@ paths:
459459
summary: Get Storage Info
460460
tags:
461461
- Storage
462+
/storage/user-bucket:
463+
delete:
464+
operationId: delete_user_bucket
465+
responses:
466+
'204':
467+
description: Successful Response
468+
security:
469+
- HTTPBearer: []
470+
summary: Delete User Bucket
471+
tags:
472+
- Storage

gen3workflow/aws_utils.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,99 @@ def create_user_bucket(user_id: str) -> Tuple[str, str, str]:
189189
)
190190

191191
return user_bucket_name, "ga4gh-tes", config["USER_BUCKETS_REGION"]
192+
193+
194+
def get_all_bucket_objects(user_bucket_name):
195+
"""
196+
Get all objects from the specified S3 bucket.
197+
"""
198+
response = s3_client.list_objects_v2(Bucket=user_bucket_name)
199+
object_list = response.get("Contents", [])
200+
201+
# list_objects_v2 can utmost return 1000 objects in a single response
202+
# if there are more objects, the response will have a key "IsTruncated" set to True
203+
# and a key "NextContinuationToken" which can be used to get the next set of objects
204+
205+
# TODO:
206+
# Currently, all objects are loaded into memory, which can be problematic for large datasets.
207+
# To optimize, convert this function into a generator that accepts a `batch_size` parameter (capped at 1,000)
208+
# and yields objects in batches.
209+
while response.get("IsTruncated"):
210+
response = s3_client.list_objects_v2(
211+
Bucket=user_bucket_name,
212+
ContinuationToken=response.get("NextContinuationToken"),
213+
)
214+
object_list += response.get("Contents", [])
215+
216+
return object_list
217+
218+
219+
def delete_all_bucket_objects(user_id, user_bucket_name):
220+
"""
221+
Deletes all objects from the specified S3 bucket.
222+
223+
Args:
224+
user_id (str): The user's unique Gen3 ID.
225+
user_bucket_name (str): The name of the S3 bucket.
226+
"""
227+
object_list = get_all_bucket_objects(user_bucket_name)
228+
229+
if not object_list:
230+
return
231+
232+
logger.debug(
233+
f"Deleting all contents from '{user_bucket_name}' for user '{user_id}' before deleting the bucket"
234+
)
235+
keys = [{"Key": obj.get("Key")} for obj in object_list]
236+
237+
# According to the docs, up to 1000 objects can be deleted in a single request:
238+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.delete_objects
239+
240+
# TODO: When `get_all_bucket_objects` is converted to a generator,
241+
# we can remove this batching logic and retrieve objects in batches of 1,000 for deletion.
242+
limit = 1000
243+
for offset in range(0, len(keys), limit):
244+
response = s3_client.delete_objects(
245+
Bucket=user_bucket_name,
246+
Delete={"Objects": keys[offset : offset + limit]},
247+
)
248+
if response.get("Errors"):
249+
logger.error(
250+
f"Failed to delete objects from bucket '{user_bucket_name}' for user '{user_id}': {response}"
251+
)
252+
raise Exception(response)
253+
254+
255+
def delete_user_bucket(user_id: str) -> Union[str, None]:
256+
"""
257+
Deletes all objects from a user's S3 bucket before deleting the bucket itself.
258+
259+
Args:
260+
user_id (str): The user's unique Gen3 ID
261+
262+
Raises:
263+
Exception: If there is an error during the deletion process.
264+
"""
265+
user_bucket_name = get_safe_name_from_hostname(user_id)
266+
267+
try:
268+
s3_client.head_bucket(Bucket=user_bucket_name)
269+
except ClientError as e:
270+
error_code = e.response["Error"]["Code"]
271+
if error_code == "404":
272+
logger.warning(
273+
f"Bucket '{user_bucket_name}' not found for user '{user_id}'."
274+
)
275+
return None
276+
277+
logger.info(f"Deleting bucket '{user_bucket_name}' for user '{user_id}'")
278+
try:
279+
delete_all_bucket_objects(user_id, user_bucket_name)
280+
s3_client.delete_bucket(Bucket=user_bucket_name)
281+
return user_bucket_name
282+
283+
except Exception as e:
284+
logger.error(
285+
f"Failed to delete bucket '{user_bucket_name}' for user '{user_id}': {e}"
286+
)
287+
raise

gen3workflow/routes/storage.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from fastapi import APIRouter, Depends, Request
2-
from starlette.status import HTTP_200_OK
1+
from fastapi import APIRouter, Depends, Request, HTTPException
2+
from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
33

44
from gen3workflow import aws_utils, logger
55
from gen3workflow.auth import Auth
@@ -19,3 +19,22 @@ async def get_storage_info(request: Request, auth=Depends(Auth)) -> dict:
1919
"workdir": f"s3://{bucket_name}/{bucket_prefix}",
2020
"region": bucket_region,
2121
}
22+
23+
24+
@router.delete("/user-bucket", status_code=HTTP_204_NO_CONTENT)
25+
async def delete_user_bucket(request: Request, auth=Depends(Auth)) -> None:
26+
await auth.authorize("delete", ["/services/workflow/user-bucket"])
27+
28+
token_claims = await auth.get_token_claims()
29+
user_id = token_claims.get("sub")
30+
logger.info(f"User '{user_id}' deleting their storage bucket")
31+
deleted_bucket_name = aws_utils.delete_user_bucket(user_id)
32+
33+
if not deleted_bucket_name:
34+
raise HTTPException(
35+
HTTP_404_NOT_FOUND, "Deletion failed: No user bucket found."
36+
)
37+
38+
logger.info(
39+
f"Bucket '{deleted_bucket_name}' for user '{user_id}' deleted successfully"
40+
)

tests/test_misc.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
from moto import mock_aws
55
import pytest
6+
from unittest.mock import patch, MagicMock
67

78
from conftest import TEST_USER_ID
89
from gen3workflow import aws_utils
@@ -53,6 +54,14 @@ def test_get_safe_name_from_hostname(reset_config_hostname):
5354
async def test_storage_info(client, access_token_patcher, mock_aws_services):
5455
# check that the user's storage information is as expected
5556
expected_bucket_name = f"gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}"
57+
58+
# Bucket must not exist before this test
59+
with pytest.raises(ClientError) as e:
60+
aws_utils.s3_client.head_bucket(Bucket=expected_bucket_name)
61+
assert (
62+
e.value.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404
63+
), f"Bucket exists: {e.value}"
64+
5665
res = await client.get("/storage/info", headers={"Authorization": "bearer 123"})
5766
assert res.status_code == 200, res.text
5867
storage_info = res.json()
@@ -62,6 +71,10 @@ async def test_storage_info(client, access_token_patcher, mock_aws_services):
6271
"region": config["USER_BUCKETS_REGION"],
6372
}
6473

74+
# check that the bucket was created after the call to `/storage/info`
75+
bucket_exists = aws_utils.s3_client.head_bucket(Bucket=expected_bucket_name)
76+
assert bucket_exists, "Bucket does not exist"
77+
6578
# check that the bucket is setup with KMS encryption
6679
kms_key = aws_utils.kms_client.describe_key(KeyId=f"alias/{expected_bucket_name}")
6780
kms_key_arn = kms_key["KeyMetadata"]["Arn"]
@@ -169,3 +182,118 @@ async def test_bucket_enforces_encryption(
169182
# ServerSideEncryption="aws:kms",
170183
# SSEKMSKeyId=authorized_kms_key_arn,
171184
# )
185+
186+
187+
@pytest.mark.asyncio
188+
async def test_delete_user_bucket(client, access_token_patcher, mock_aws_services):
189+
"""
190+
The user should be able to delete their own bucket.
191+
"""
192+
193+
# Create the bucket if it doesn't exist
194+
res = await client.get("/storage/info", headers={"Authorization": "bearer 123"})
195+
bucket_name = res.json()["bucket"]
196+
197+
# Verify the bucket exists
198+
bucket_exists = aws_utils.s3_client.head_bucket(Bucket=bucket_name)
199+
assert bucket_exists, "Bucket does not exist"
200+
201+
# Delete the bucket
202+
res = await client.delete(
203+
"/storage/user-bucket", headers={"Authorization": "bearer 123"}
204+
)
205+
assert res.status_code == 204, res.text
206+
207+
# Verify the bucket is deleted
208+
with pytest.raises(ClientError) as e:
209+
aws_utils.s3_client.head_bucket(Bucket=bucket_name)
210+
assert (
211+
e.value.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404
212+
), f"Bucket still exists: {e.value}"
213+
214+
# Attempt to Delete the bucket again, must receive a 404, since bucket not found.
215+
res = await client.delete(
216+
"/storage/user-bucket", headers={"Authorization": "bearer 123"}
217+
)
218+
assert res.status_code == 404, res.text
219+
220+
221+
@pytest.mark.asyncio
222+
async def test_delete_user_bucket_with_files(
223+
client, access_token_patcher, mock_aws_services
224+
):
225+
"""
226+
Attempt to delete a bucket that is not empty.
227+
Endpoint must be able to delete all the files and then delete the bucket.
228+
"""
229+
230+
# Create the bucket if it doesn't exist
231+
res = await client.get("/storage/info", headers={"Authorization": "bearer 123"})
232+
bucket_name = res.json()["bucket"]
233+
234+
# Remove the bucket policy enforcing KMS encryption
235+
# Moto has limitations that prevent adding objects to a bucket with KMS encryption enabled.
236+
# More details: https://github.com/uc-cdis/gen3-workflow/blob/554fc3eb4c1d333f9ef81c1a5f8e75a6b208cdeb/tests/test_misc.py#L161-L171
237+
aws_utils.s3_client.delete_bucket_policy(Bucket=bucket_name)
238+
239+
# Upload more than 1000 objects to ensure batching is working correctly
240+
object_count = 1200
241+
for i in range(object_count):
242+
aws_utils.s3_client.put_object(
243+
Bucket=bucket_name, Key=f"file_{i}", Body=b"Dummy file contents"
244+
)
245+
246+
# Verify all the objects in the bucket are fetched even when bucket has more than 1000 objects
247+
object_list = aws_utils.get_all_bucket_objects(bucket_name)
248+
assert len(object_list) == object_count
249+
250+
# Delete the bucket
251+
res = await client.delete(
252+
"/storage/user-bucket", headers={"Authorization": "bearer 123"}
253+
)
254+
assert res.status_code == 204, res.text
255+
256+
# Verify the bucket is deleted
257+
with pytest.raises(ClientError) as e:
258+
aws_utils.s3_client.head_bucket(Bucket=bucket_name)
259+
assert (
260+
e.value.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404
261+
), f"Bucket still exists: {e.value}"
262+
263+
264+
@pytest.mark.asyncio
265+
async def test_delete_user_bucket_no_token(client, mock_aws_services):
266+
"""
267+
Attempt to delete a bucket when the user is not logged in. Must receive a 401 error.
268+
"""
269+
mock_delete_bucket = MagicMock()
270+
# Delete the bucket
271+
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
272+
res = await client.delete("/storage/user-bucket")
273+
assert res.status_code == 401, res.text
274+
assert res.json() == {"detail": "Must provide an access token"}
275+
mock_delete_bucket.assert_not_called()
276+
277+
278+
@pytest.mark.asyncio
279+
@pytest.mark.parametrize(
280+
"client",
281+
[pytest.param({"authorized": False, "tes_resp_code": 200}, id="unauthorized")],
282+
indirect=True,
283+
)
284+
async def test_delete_user_bucket_unauthorized(
285+
client, access_token_patcher, mock_aws_services
286+
):
287+
"""
288+
Attempt to delete a bucket when the user is logged in but does not have the appropriate authorization.
289+
Must receive a 403 error.
290+
"""
291+
mock_delete_bucket = MagicMock()
292+
# Delete the bucket
293+
with patch("gen3workflow.aws_utils.delete_user_bucket", mock_delete_bucket):
294+
res = await client.delete(
295+
"/storage/user-bucket", headers={"Authorization": "bearer 123"}
296+
)
297+
assert res.status_code == 403, res.text
298+
assert res.json() == {"detail": "Permission denied"}
299+
mock_delete_bucket.assert_not_called()

0 commit comments

Comments
 (0)