uc-cdis · paulineribeyre · Mar 4, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -178,9 +178,9 @@
         "filename": "tests/test_s3_endpoint.py",
         "hashed_secret": "08d2e98e6754af941484848930ccbaddfefe13d6",
         "is_verified": false,
-        "line_number": 77
+        "line_number": 82
       }
     ]
   },
-  "generated_at": "2026-03-02T20:27:56Z"
+  "generated_at": "2026-03-03T22:13:17Z"
 }
diff --git a/gen3workflow/app.py b/gen3workflow/app.py
@@ -66,7 +66,16 @@ async def get_status():
         root_path=config["DOCS_URL_PREFIX"],
         generate_unique_id_function=generate_unique_route_id,
     )
-    app.async_client = httpx_client or httpx.AsyncClient()
+
+    # `async_client` is used to hit the TES API, the Arborist service and AWS S3.
+    # Calls to S3 tend to timeout when uploading large files (and we might also be rate-limited).
+    # AsyncHTTPTransport supports retrying on httpx.ConnectError or httpx.ConnectTimeout.
+    # The `httpx_client` parameter is not meant to be used in production. It allows mocking
+    # external calls when testing.
+    app.async_client = httpx_client or httpx.AsyncClient(
+        transport=httpx.AsyncHTTPTransport(retries=3), timeout=120
+    )
+
     app.include_router(ga4gh_tes_router, tags=["GA4GH TES"])
     app.include_router(s3_router, tags=["S3"])
     app.include_router(storage_router, tags=["Storage"])

diff --git a/gen3workflow/routes/s3.py b/gen3workflow/routes/s3.py
@@ -192,10 +192,6 @@ async def s3_endpoint(path: str, request: Request):
     # get the name of the user's bucket and ensure the user is making a call to their own bucket
     logger.info(f"Incoming S3 request from user '{user_id}': '{request.method} {path}'")
     user_bucket = aws_utils.get_safe_name_from_hostname(user_id)
-    if request.method == "GET" and path == "s3":
-        err_msg = f"'ls' not supported, use 'ls s3://{user_bucket}' instead"
-        logger.error(err_msg)
-        raise HTTPException(HTTP_400_BAD_REQUEST, err_msg)
     request_bucket = path.split("?")[0].split("/")[0]
     if request_bucket != user_bucket:
         err_msg = f"'{path}' (bucket '{request_bucket}') not allowed. You can make calls to your personal bucket, '{user_bucket}'"
@@ -282,8 +278,17 @@ async def s3_endpoint(path: str, request: Request):
         assert credentials, "No AWS credentials found"
         headers["x-amz-security-token"] = credentials.token
 
-    # if this is a PUT request, we need the KMS key ID to use for encryption
-    if config["KMS_ENCRYPTION_ENABLED"] and request.method == "PUT":
+    # If this is a PUT or POST request, specify the KMS key to use for encryption.
+    # For multipart uploads, the initial CreateMultipartUpload request includes the KMS
+    # configuration, and the following UploadPart and CompleteMultipartUpload requests do not.
+    # We know this is an UploadPart or CompleteMultipartUpload request if it includes the
+    # uploadId query parameter.
+    query_params = dict(request.query_params)
+    if (
+        config["KMS_ENCRYPTION_ENABLED"]
+        and request.method in ["PUT", "POST"]
+        and "uploadId" not in query_params
+    ):
         _, kms_key_arn = aws_utils.get_existing_kms_key_for_bucket(user_bucket)
         if not kms_key_arn:
             err_msg = "Bucket misconfigured. Hit the `GET /storage/setup` endpoint and try again."
@@ -300,7 +305,6 @@ async def s3_endpoint(path: str, request: Request):
         f"{key.lower()}:{headers[key]}\n" for key in sorted_headers
     )
     signed_headers = ";".join([k.lower() for k in sorted_headers])
-    query_params = dict(request.query_params)
     # the query params in the canonical request have to be sorted:
     query_params_names = sorted(list(query_params.keys()))
     canonical_query_params = "&".join(

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -226,10 +226,33 @@ def mock_tes_server_request_function(
     return httpx.Response(status_code=status_code, json=out, text=text)
 
 
+def mock_aws_s3_request_function(url: str):
+    """
+    Mock responses from AWS S3
+    """
+    resp_xml = MOCKED_S3_RESPONSE_XML
+    headers = {"content-type": "application/xml"}
+
+    # multipart upload special case:
+    if "test_s3_upload_file_multipart.txt" in url:
+        upload_id = "test-upload-id"
+        # "InitiateMultipartUploadResult" with "UploadId"
+        resp_xml = f"""<?xml version="1.0" encoding="UTF-8"?>\n<InitiateMultipartUploadResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Bucket>gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}</Bucket><Key>test_s3_chunk_upload.txt</Key><UploadId>{upload_id}</UploadId></InitiateMultipartUploadResult>"""
+        if f"?uploadId={upload_id}&partNumber=" in url:
+            headers["etag"] = "test-etag"
+
+    return httpx.Response(
+        status_code=200,
+        text=resp_xml,
+        headers=headers,
+    )
+
+
 # making these functions into mocks allows tests to check the requests that were made, for
 # example: `mock_tes_server_request.assert_called_with(...)`
 mock_tes_server_request = MagicMock(side_effect=mock_tes_server_request_function)
 mock_arborist_request = MagicMock(side_effect=mock_arborist_request_function)
+mock_aws_s3_request = MagicMock(side_effect=mock_aws_s3_request_function)
 
 
 @pytest_asyncio.fixture(scope="function", autouse=True)
@@ -316,20 +339,7 @@ async def handle_request(request: Request):
             f"https://gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}.s3.{config['USER_BUCKETS_REGION']}.amazonaws.com"
         ):
             # mock calls to AWS S3
-            resp_xml = MOCKED_S3_RESPONSE_XML
-            headers = {"content-type": "application/xml"}
-            # multipart upload special case:
-            if "test_s3_upload_file_multipart.txt" in url:
-                upload_id = "test-upload-id"
-                # "InitiateMultipartUploadResult" with "UploadId"
-                resp_xml = f"""<?xml version="1.0" encoding="UTF-8"?>\n<InitiateMultipartUploadResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Bucket>gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}</Bucket><Key>test_s3_chunk_upload.txt</Key><UploadId>{upload_id}</UploadId></InitiateMultipartUploadResult>"""
-                if f"?uploadId={upload_id}&partNumber=" in url:
-                    headers["etag"] = "test-etag"
-            mocked_response = httpx.Response(
-                status_code=200,
-                text=resp_xml,
-                headers=headers,
-            )
+            mocked_response = mock_aws_s3_request(url)
 
         if mocked_response is not None:
             print(f"Mocking request '{request.method} {url}'")
@@ -339,7 +349,7 @@ async def handle_request(request: Request):
             httpx_client_function = getattr(httpx.AsyncClient(), request.method.lower())
             return await httpx_client_function(url)
 
-    # set the httpx clients used by the app and by the Arborist client to mock clients that
+    # the httpx clients used by the app and by gen3authz are set to mock clients that
     # call `handle_request`
     mock_httpx_client = httpx.AsyncClient(transport=httpx.MockTransport(handle_request))
     app = get_app(httpx_client=mock_httpx_client)

diff --git a/tests/test_s3_endpoint.py b/tests/test_s3_endpoint.py
@@ -8,7 +8,12 @@
 from fastapi import HTTPException
 import pytest
 
-from conftest import MOCKED_S3_RESPONSE_DICT, TEST_USER_ID, TEST_USER_TOKEN
+from conftest import (
+    MOCKED_S3_RESPONSE_DICT,
+    TEST_USER_ID,
+    TEST_USER_TOKEN,
+    mock_aws_s3_request,
+)
 from gen3workflow.config import config
 from gen3workflow.routes.s3 import (
     set_access_token_and_get_user_id,
@@ -350,23 +355,33 @@ def test_s3_upload_file(s3_client, access_token_patcher, multipart):
     Test that the boto3 `upload_file` function works with the `/s3` endpoint, both for a small
     file uploaded in 1 part and for a large file uploaded in multiple parts.
     """
+    bucket_name = f"gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}"
+    object_key = f"test_s3_upload_file{'_multipart' if multipart else ''}.txt"
+
     with patch(
         "gen3workflow.aws_utils.get_existing_kms_key_for_bucket",
         lambda _: ("test_kms_key_alias", "test_kms_key_arn"),
     ):
-        with tempfile.NamedTemporaryFile(mode="w+t", delete=True) as file_to_upload:
-            file_to_upload.write("Test file contents\n")
+        with tempfile.NamedTemporaryFile(delete=True) as file_to_upload:
+            file_to_upload.write(b"A" * (6 * 1024 * 1024))  # create a 6MB file
+            file_to_upload.flush()
             s3_client.upload_file(
                 file_to_upload.name,
-                f"gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}",
-                f"test_s3_upload_file{'_multipart' if multipart else ''}.txt",
-                # to test a multipart upload, set the chunk size to 1 to force splitting the file
-                # into multiple chunks:
-                Config=boto3.s3.transfer.TransferConfig(
-                    multipart_threshold=1 if multipart else 9999
+                bucket_name,
+                object_key,
+                # to test a multipart upload, set the part size to 1 to force splitting the file
+                # into multiple parts:
+                Config=(
+                    boto3.s3.transfer.TransferConfig(multipart_threshold=1)
+                    if multipart
+                    else None
                 ),
             )
 
+    mock_aws_s3_request.assert_called_with(
+        f"https://{bucket_name}.s3.us-east-1.amazonaws.com/{object_key}{'?uploadId=test-upload-id' if multipart else ''}"
+    )
+
 
 def test_chunked_to_non_chunked_body():
     """