Skip to content

Commit 994c00c

Browse files
support funnel s3 request format
1 parent 41d3e7e commit 994c00c

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

gen3workflow/routes/s3.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime, timezone
12
import hashlib
23
from typing import Tuple
34
import urllib.parse
@@ -42,7 +43,6 @@ def get_access_token(headers: Headers) -> Tuple[str, str]:
4243
a client_credentials token
4344
"""
4445
auth_header = headers.get("authorization")
45-
logger.info(f"DEBUG: auth_header = {auth_header}")
4646
if not auth_header:
4747
return "", ""
4848
if auth_header.lower().startswith("bearer"):
@@ -55,10 +55,7 @@ def get_access_token(headers: Headers) -> Tuple[str, str]:
5555
else: # format 2 (see docstring)
5656
access_key_id = auth_header.split("AWS ")[1]
5757
access_key_id = ":".join(access_key_id.split(":")[:-1])
58-
logger.info(f"DEBUG: access_key_id = {access_key_id}")
5958
access_token, user_id = access_key_id.split(";userId=")
60-
logger.info(f"DEBUG: access_token = {access_token}")
61-
logger.info(f"DEBUG: user_id = {user_id}")
6259
return access_token, user_id
6360
except Exception as e:
6461
logger.error(
@@ -148,19 +145,30 @@ async def s3_endpoint(path: str, request: Request):
148145
request_path = path.split(user_bucket)[1] or "/"
149146
api_endpoint = "/".join(request_path.split("/")[1:])
150147

148+
region = config["USER_BUCKETS_REGION"]
149+
service = "s3"
151150
body = await request.body()
152151
body_hash = hashlib.sha256(body).hexdigest()
153-
timestamp = request.headers["x-amz-date"]
152+
timestamp = request.headers.get("x-amz-date")
153+
logger.info(f"DEBUG: request.headers = {request.headers}")
154+
if not timestamp and request.headers.get("date"):
155+
# assume RFC 1123 format, convert to ISO 8601 basic YYYYMMDD'T'HHMMSS'Z' format
156+
dt = datetime.strptime(request.headers["date"], "%a, %d %b %Y %H:%M:%S %Z")
157+
timestamp = dt.strftime("%Y%m%dT%H%M%SZ")
158+
logger.info(f"DEBUG: date to timestamp = {timestamp}")
159+
if not timestamp:
160+
# no `x-amz-date` or `date` header, just generate it ourselves
161+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
154162
date = timestamp[:8] # the date portion (YYYYMMDD) of the timestamp
155-
region = config["USER_BUCKETS_REGION"]
156-
service = "s3"
163+
logger.info(f"DEBUG: date = {date}")
157164

158165
# generate the request headers.
159166
# overwrite the original `x-amz-content-sha256` header value with the body hash. When this
160167
# header is set to "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" in the original request (payload sent
161168
# over multiple chunks), we still replace it with the body hash (because I couldn't get the
162169
# signing to work for "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" - I believe it requires using the signature from the previous chunk).
163170
# NOTE: This may cause issues when large files are _actually_ uploaded over multiple chunks.
171+
# TODO test with an input file >5go
164172
headers = {
165173
"host": f"{user_bucket}.s3.amazonaws.com",
166174
"x-amz-content-sha256": body_hash,

0 commit comments

Comments
 (0)