Skip to content

Commit 41d8ee4

Browse files
clean up
1 parent 57d3c95 commit 41d8ee4

File tree

5 files changed

+43
-44
lines changed

5 files changed

+43
-44
lines changed

.secrets.baseline

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,9 @@
214214
"filename": "tests/test_s3_endpoint.py",
215215
"hashed_secret": "08d2e98e6754af941484848930ccbaddfefe13d6",
216216
"is_verified": false,
217-
"line_number": 23
217+
"line_number": 21
218218
}
219219
]
220220
},
221-
"generated_at": "2025-04-30T16:01:07Z"
221+
"generated_at": "2025-04-30T21:49:54Z"
222222
}

gen3workflow/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from gen3workflow import logger
1010
from gen3workflow.config import config
1111
from gen3workflow.routes.ga4gh_tes import router as ga4gh_tes_router
12-
from gen3workflow.routes.s3 import root_router, s3_router
12+
from gen3workflow.routes.s3 import s3_root_router, s3_router
1313
from gen3workflow.routes.storage import router as storage_router
1414
from gen3workflow.routes.system import router as system_router
1515

@@ -32,7 +32,7 @@ def get_app(httpx_client=None) -> FastAPI:
3232
app.include_router(s3_router, tags=["S3"])
3333
app.include_router(storage_router, tags=["Storage"])
3434
app.include_router(system_router, tags=["System"])
35-
app.include_router(root_router, tags=["S3"])
35+
app.include_router(s3_root_router, tags=["S3"])
3636

3737
# Following will update logger level, propagate, and handlers
3838
get_logger("gen3workflow", log_level=log_level)

gen3workflow/routes/ga4gh_tes.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ async def create_task(request: Request, auth=Depends(Auth)) -> dict:
8383
await auth.authorize("create", ["/services/workflow/gen3-workflow/tasks"])
8484

8585
body = await get_request_body(request)
86-
logger.info(f"DEBUG: request body = {json.dumps(body)}") # TODO remove
8786

8887
# add the `AUTHZ` tag to the task, so access can be checked by the other endpoints
8988
token_claims = await auth.get_token_claims()

gen3workflow/routes/s3.py

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,49 +23,62 @@
2323
from gen3workflow.routes.system import get_status
2424

2525

26-
root_router = APIRouter()
26+
s3_root_router = APIRouter()
2727
s3_router = APIRouter(prefix="/s3")
2828

2929

30-
def get_access_token(headers: Headers) -> Tuple[str, str]:
30+
async def set_access_token_and_get_user_id(auth: Auth, headers: Headers) -> str:
3131
"""
32-
Extract the user's access token and (in the case of a client token) the user's ID, which
33-
should have been provided as the key ID, from the Authorization header in one of the two
34-
following expected formats:
32+
Extract the user's access token and (in some cases) the user's ID, which should have been
33+
provided as the access key ID, from the Authorization header in one of the two following
34+
expected formats:
3535
1. Key ID set by the python boto3 AWS client: `AWS4-HMAC-SHA256 Credential=<key ID>/<date>/
36-
<region>/<service>/aws4_request, SignedHeaders=<>, Signature=<>`
36+
<region>/<service>/aws4_request, SignedHeaders=<>, Signature=<>`
3737
2. Key ID set by Funnel GenericS3 through the Minio-go client: `AWS <key ID>:<>`
38+
Return the user's ID extracted from the key ID or from the decoded token. Also set the provided
39+
`auth` instance's `bearer_token` to the extracted access token.
3840
3941
Args:
42+
auth (Auth): Gen3Workflow auth instance
4043
headers (Headers): request headers
4144
4245
Returns:
43-
(str, str): the user's access token or "" if not found, and the user's ID if the token is
44-
a client_credentials token
46+
str: the user's ID
4547
"""
4648
# TODO unit tests for this function
4749
auth_header = headers.get("authorization")
4850
if not auth_header:
49-
return "", ""
51+
return ""
5052
if auth_header.lower().startswith("bearer"):
5153
err_msg = f"Bearer tokens in the authorization header are not supported by this endpoint, which expects signed S3 requests. The recommended way to use this endpoint is to use the AWS SDK or CLI"
5254
logger.error(err_msg)
5355
raise HTTPException(HTTP_401_UNAUTHORIZED, err_msg)
56+
5457
try:
5558
if "Credential=" in auth_header: # format 1 (see docstring)
56-
access_token = auth_header.split("Credential=")[1].split("/")[0]
57-
user_id = None
59+
access_key_id = auth_header.split("Credential=")[1].split("/")[0]
5860
else: # format 2 (see docstring)
5961
access_key_id = auth_header.split("AWS ")[1]
6062
access_key_id = ":".join(access_key_id.split(":")[:-1])
61-
access_token, user_id = access_key_id.split(";userId=")
62-
return access_token, user_id
6363
except Exception as e:
6464
traceback.print_exc()
6565
logger.error(
6666
f"Unexpected format; unable to extract access token from authorization header: {e}"
6767
)
68-
return "", ""
68+
return ""
69+
70+
if ";userId=" in access_key_id:
71+
access_token, user_id = access_key_id.split(";userId=")
72+
# TODO assert it's a client token not linked to a user
73+
else:
74+
access_token = access_key_id
75+
auth.bearer_token = HTTPAuthorizationCredentials(
76+
scheme="bearer", credentials=access_token
77+
)
78+
token_claims = await auth.get_token_claims()
79+
user_id = token_claims.get("sub")
80+
81+
return user_id
6982

7083

7184
def get_signature_key(key: str, date: str, region_name: str, service_name: str) -> str:
@@ -85,7 +98,7 @@ def get_signature_key(key: str, date: str, region_name: str, service_name: str)
8598
return key_signing
8699

87100

88-
@root_router.api_route(
101+
@s3_root_router.api_route(
89102
"/{path:path}",
90103
methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "PATCH", "TRACE", "HEAD"],
91104
)
@@ -103,24 +116,17 @@ async def s3_endpoint(path: str, request: Request):
103116
not support S3 endpoints with a path, such as the Minio-go S3 client.
104117
"""
105118

106-
# because this endpoint is exposed at root, if the path is empty, assume the user is not trying
107-
# to reach the S3 endpoint and redirect to the status endpoint instead
119+
# because this endpoint is exposed at root, if the GET path is empty, assume the user is not
120+
# trying to reach the S3 endpoint and redirect to the status endpoint instead
108121
if request.method == "GET" and not path:
109122
return await get_status(request)
110123

111-
# extract the user's access token from the request headers, and ensure the user has access
112-
# to run workflows
124+
# extract the caller's access token from the request headers, and ensure the caller (user, or
125+
# client acting on behalf of the user) has access to run workflows
113126
auth = Auth(api_request=request)
114-
access_token, user_id = get_access_token(request.headers)
115-
if user_id:
116-
pass # TODO assert it's a client token not linked to a user, and check authz
117-
else:
118-
auth.bearer_token = HTTPAuthorizationCredentials(
119-
scheme="bearer", credentials=access_token
120-
)
121-
await auth.authorize("create", ["/services/workflow/gen3-workflow/tasks"])
122-
token_claims = await auth.get_token_claims()
123-
user_id = token_claims.get("sub")
127+
user_id = await set_access_token_and_get_user_id(auth, request.headers)
128+
# TODO client token unit tests, including authz
129+
await auth.authorize("create", ["/services/workflow/gen3-workflow/tasks"])
124130

125131
# get the name of the user's bucket and ensure the user is making a call to their own bucket
126132
logger.info(f"Incoming S3 request from user '{user_id}': '{request.method} {path}'")

tests/test_s3_endpoint.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,25 @@
77
from gen3workflow.config import config
88

99

10-
@pytest.fixture()
10+
@pytest.fixture(
11+
params=[pytest.param("/s3", id="s3 path"), pytest.param("", id="root path")]
12+
)
1113
def s3_client(client, request):
1214
"""
1315
Return an S3 client configured to talk to the gen3-workflow `/s3` endpoint.
1416
"""
15-
path = ""
16-
if hasattr(request, "param"):
17-
path = request.param
18-
1917
session = boto3.session.Session()
2018
return session.client(
2119
service_name="s3",
2220
aws_access_key_id="123",
2321
aws_secret_access_key="N/A",
24-
endpoint_url=f"{client}/{path}",
22+
endpoint_url=f"{client}/{request.param}",
2523
# no retries; only try each call once:
2624
config=Config(retries={"max_attempts": 0}),
2725
)
2826

2927

3028
@pytest.mark.parametrize("client", [{"get_url": True}], indirect=True)
31-
@pytest.mark.parametrize("s3_client", ["", "/s3"], indirect=True)
3229
def test_s3_endpoint(s3_client, access_token_patcher):
3330
"""
3431
Hitting the `/s3` endpoint should result in the request being forwarded to AWS S3.
@@ -39,7 +36,6 @@ def test_s3_endpoint(s3_client, access_token_patcher):
3936

4037

4138
@pytest.mark.parametrize("client", [{"get_url": True}], indirect=True)
42-
@pytest.mark.parametrize("s3_client", ["", "/s3"], indirect=True)
4339
def test_s3_endpoint_no_token(s3_client):
4440
"""
4541
Hitting the `/s3` endpoint without a Gen3 access token should result in a 401 Unauthorized
@@ -52,7 +48,6 @@ def test_s3_endpoint_no_token(s3_client):
5248
@pytest.mark.parametrize(
5349
"client", [{"get_url": True, "authorized": False}], indirect=True
5450
)
55-
@pytest.mark.parametrize("s3_client", ["", "/s3"], indirect=True)
5651
def test_s3_endpoint_unauthorized(s3_client, access_token_patcher):
5752
"""
5853
Hitting the `/s3` endpoint with a Gen3 access token that does not have the appropriate access
@@ -67,7 +62,6 @@ def test_s3_endpoint_unauthorized(s3_client, access_token_patcher):
6762
"bucket_name",
6863
["not-the-user-s-bucket", f"gen3wf-{config['HOSTNAME']}-{TEST_USER_ID}-2"],
6964
)
70-
@pytest.mark.parametrize("s3_client", ["", "/s3"], indirect=True)
7165
def test_s3_endpoint_wrong_bucket(s3_client, access_token_patcher, bucket_name):
7266
"""
7367
Hitting the `/s3` endpoint with a bucket that is not the bucket generated by gen3-workflow for

0 commit comments

Comments
 (0)