Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
55c5e7d
print body
paulineribeyre Feb 23, 2026
3bab856
parse 'STREAMING-UNSIGNED-PAYLOAD-TRAILER' body
paulineribeyre Feb 23, 2026
ecea86d
fix multipart upload
paulineribeyre Feb 24, 2026
18a5a1a
deps update
paulineribeyre Feb 24, 2026
b38f3ba
revert kms change for now; clean up
paulineribeyre Feb 24, 2026
30f6701
log body
paulineribeyre Feb 24, 2026
34e1bb2
test STREAMING-UNSIGNED-PAYLOAD-TRAILER
paulineribeyre Feb 24, 2026
54f8afe
test STREAMING-UNSIGNED-PAYLOAD-TRAILER
paulineribeyre Feb 24, 2026
9772135
fix header case
paulineribeyre Feb 24, 2026
5fb7502
fix: SyntaxWarning: invalid escape sequence '\Z'
paulineribeyre Feb 24, 2026
bd54b42
fix header case
paulineribeyre Feb 24, 2026
eaa5b28
fix header case
paulineribeyre Feb 24, 2026
8365cf3
fix header case
paulineribeyre Feb 24, 2026
79bbb79
fix header case
paulineribeyre Feb 24, 2026
411e264
clean up
paulineribeyre Feb 24, 2026
80a5d46
simplify
paulineribeyre Feb 24, 2026
612b597
fix error: Too little data for declared Content-Length
paulineribeyre Feb 25, 2026
76025d3
fix: Header value must be str or bytes, not int
paulineribeyre Feb 25, 2026
a2324c1
debug
paulineribeyre Feb 25, 2026
188b0a7
fix: SignatureDoesNotMatch
paulineribeyre Feb 25, 2026
599f1ef
clean up
paulineribeyre Feb 25, 2026
74c5206
clean up
paulineribeyre Feb 25, 2026
82629a6
reject ls all
paulineribeyre Feb 25, 2026
ce0b098
Refactor authz resource tree
paulineribeyre Feb 26, 2026
1402e9e
fix graph
paulineribeyre Feb 26, 2026
9525160
fix auto authz access
paulineribeyre Feb 27, 2026
1e47ab9
rename endpoint for clarity
paulineribeyre Feb 27, 2026
1f7b86f
fix s3 auth
paulineribeyre Feb 27, 2026
f5ae28f
more renaming
paulineribeyre Feb 27, 2026
ee929fb
pull master
paulineribeyre Feb 27, 2026
fab6149
small fixes
paulineribeyre Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 6 additions & 84 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.gibberish.should_exclude_secret",
"limit": 3.7
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
Expand Down Expand Up @@ -126,88 +130,6 @@
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {
".github/workflows/ci.yml": [
{
"type": "Secret Keyword",
"filename": ".github/workflows/ci.yml",
"hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
"is_verified": false,
"line_number": 15
}
],
"alembic.ini": [
{
"type": "Basic Auth Credentials",
"filename": "alembic.ini",
"hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684",
"is_verified": false,
"line_number": 64
}
],
"docs/local_installation.md": [
{
"type": "Secret Keyword",
"filename": "docs/local_installation.md",
"hashed_secret": "08d2e98e6754af941484848930ccbaddfefe13d6",
"is_verified": false,
"line_number": 90
}
],
"docs/s3.md": [
{
"type": "Secret Keyword",
"filename": "docs/s3.md",
"hashed_secret": "08d2e98e6754af941484848930ccbaddfefe13d6",
"is_verified": false,
"line_number": 56
}
],
"migrations/versions/e1886270d9d2_create_system_key_table.py": [
{
"type": "Hex High Entropy String",
"filename": "migrations/versions/e1886270d9d2_create_system_key_table.py",
"hashed_secret": "1df47988c41b70d5541f29636c48c6127cf593b8",
"is_verified": false,
"line_number": 16
}
],
"tests/conftest.py": [
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "0dd78d9147bb410f0cb0199c5037da36594f77d8",
"is_verified": false,
"line_number": 195
}
],
"tests/migrations/test_migration_e1886270d9d2.py": [
{
"type": "Hex High Entropy String",
"filename": "tests/migrations/test_migration_e1886270d9d2.py",
"hashed_secret": "1df47988c41b70d5541f29636c48c6127cf593b8",
"is_verified": false,
"line_number": 24
}
],
"tests/test-gen3workflow-config.yaml": [
{
"type": "Secret Keyword",
"filename": "tests/test-gen3workflow-config.yaml",
"hashed_secret": "900a7331f7bf83bff0e1b2c77f471b4a5145da0f",
"is_verified": false,
"line_number": 5
}
],
"tests/test_s3_endpoint.py": [
{
"type": "Secret Keyword",
"filename": "tests/test_s3_endpoint.py",
"hashed_secret": "08d2e98e6754af941484848930ccbaddfefe13d6",
"is_verified": false,
"line_number": 75
}
]
},
"generated_at": "2026-02-03T17:56:09Z"
"results": {},
"generated_at": "2026-02-27T21:39:45Z"
}
66 changes: 45 additions & 21 deletions docs/authorization.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,45 @@
The Gen3 Workflow endpoints are protected by Arborist policies.

Contents:
- [Authorization resources overview](#authorization-resources-overview)
- [Storage](#storage)
- [GA4GH TES](#ga4gh-tes)
- [Authorization configuration example](#authorization-configuration-example)
- [Authorization configuration example](#authorization-configuration-example)

## Authorization resources overview

```mermaid
graph TD;
services --> workflow;
workflow --> gen3-workflow;
gen3-workflow --> tasks;
gen3-workflow --> storage;
tasks --> user1t(user1);
tasks --> user2t(user2);
storage --> user1;
storage --> user2;
user1t --> task1;
user1t --> task2;
user2t --> task3;
```

## GA4GH TES

- To create a task, users need `create` access to resource `/services/workflow/gen3-workflow/tasks` on service `gen3-workflow`.
- To view a task, users need `read` access to resource `/users/<user ID>/gen3-workflow/tasks/<task ID>` on service `gen3-workflow`.
- Users are automatically granted access to `/users/<user ID>/gen3-workflow/tasks` so they can view their own tasks.
- Admin access (the ability to see _all_ users’ tasks instead of just your own) can be granted to a user by granting them access to the parent resource `/services/workflow/gen3-workflow/tasks`.
- This supports sharing tasks with others; for example, "user1" may share "taskA" with "user2" if the system grants "user2" access to `/users/user1/gen3-workflow/tasks/taskA`.
- To view a task, users need `read` access to resource `/services/workflow/gen3-workflow/tasks/<user ID>/<task ID>` on service `gen3-workflow`.
- To cancel a task, users need `delete` access to resource `/services/workflow/gen3-workflow/tasks/<user ID>/<task ID>` on service `gen3-workflow`.
- Admin access (the ability to see _all_ users’ tasks instead of just your own) can be granted to a user by granting them access to the parent resource `/services/workflow/gen3-workflow/tasks`.
- This supports sharing tasks with others; for example, "user1" may share "taskA" with "user2" if the system grants "user2" access to `/services/workflow/gen3-workflow/tasks/user1/taskA`.
- However, sharing task _inputs/outputs_ in the user's S3 bucket is not supported. Currently, users can only access their own S3 bucket.

## Other Gen3-Workflow functionality
- To download inputs and upload outputs, the Funnel workers need `create` access to resource `/services/workflow/gen3-workflow/tasks` on service `gen3-workflow`, like end-users.
- To empty or delete their own S3 bucket, a user needs `delete` access to the resource `/services/workflow/gen3-workflow/user-bucket` on the `gen3-workflow` service -- a special privilege useful for automated testing but not intended for the average user.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we don't need to specify this any more -- -- a special privilege useful for automated testing but not intended for the average user.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this going to be a general user feature? We aren't sure about the scalability of this action. (here)[https://github.com/uc-cdis/gen3-workflow/blob/f5ae28fd6a7157fdbd0ba0fe74c9722ef7a1239a/gen3workflow/aws_utils.py#L427C5-L428C37]

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks I forgot about that

With the refactor, it's not a special privilege anymore since i got rid of the "user-bucket" resource. You need the same access as when you use the s3 endpoint to delete a file, and all users have that access.

Since end-users can access the endpoint and have permission to delete files, stating in the docs that it shouldn't be used doesn't help - end users don't read those docs. We should just update that code when we have a chance, it's a low priority https://ctds-planx.atlassian.net/browse/MIDRC-1233

## Storage
- To upload input files, download output files, and in general manage the files in their S3 bucket, users need `create`, `read` or `delete` access to resource `/services/workflow/gen3-workflow/storage/<user ID>` on service `gen3-workflow`.
- The Funnel workers have access to `/services/workflow/gen3-workflow/storage` so they can manage files in all the user buckets.
- To empty or delete their own S3 bucket (`/storage/user-bucket` endpoints), users need `delete` access to the resource `/services/workflow/gen3-workflow/storage/<user ID>` on the `gen3-workflow` service.

#### Authorization configuration example
## Authorization configuration example

Users are automatically granted access to `/services/workflow/gen3-workflow/tasks/<user ID>` and to `/services/workflow/gen3-workflow/storage/<user ID>` so they can view and cancel their own tasks and manage files in their own bucket.

```yaml
users:
Expand All @@ -29,7 +52,7 @@ users:
clients:
funnel-plugin-client:
policies:
- gen3_workflow_user
- gen3_workflow_storage_admin

authz:
resources:
Expand All @@ -40,6 +63,7 @@ authz:
- name: gen3-workflow
subresources:
- name: tasks
- name: storage

policies:
- id: gen3_workflow_user
Expand All @@ -48,18 +72,18 @@ authz:
- gen3_workflow_creator
resource_paths:
- /services/workflow/gen3-workflow/tasks
- id: gen3_workflow_admin
- id: gen3_workflow_task_reader_admin
description: Allows access to view tasks created by all users
role_ids:
- gen3_workflow_reader
resource_paths:
- /services/workflow/gen3-workflow/tasks
- id: workflow_storage_deleter
description: Allows delete access to the user's own S3 bucket
- id: gen3_workflow_storage_admin
description: Allows access to manage all the user buckets
role_ids:
- workflow_storage_deleter
- gen3_workflow_admin
resource_paths:
- /services/workflow/gen3-workflow
- /services/workflow/gen3-workflow/storage

roles:
- id: gen3_workflow_reader
Expand All @@ -74,10 +98,10 @@ authz:
action:
service: gen3-workflow
method: create
- id: workflow_storage_deleter
permissions:
- id: workflow_storage_deleter
action:
service: gen3-workflow
method: delete
- id: gen3_workflow_admin
permissions:
- id: gen3_workflow_admin_action
action:
service: gen3-workflow
method: '*'
```
2 changes: 1 addition & 1 deletion docs/helm_chart_architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The Gen3 Workflow helm chart is [here](https://github.com/uc-cdis/gen3-helm/tree

```mermaid
graph TD;
A[Gen3 chart] --> B(Gen3 Workflow chart);
A(Gen3 chart) --> B(Gen3 Workflow chart);
A --> C(Gen3 Funnel chart);
C --> D(OHSU Funnel chart);
```
Expand Down
2 changes: 1 addition & 1 deletion docs/local_installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Try out the API at <http://localhost:8080/_status> or <http://localhost:8080/doc

## Run Nextflow workflows with Gen3Workflow

- Hit the `/storage/info` endpoint to get your working directory
- Hit the `/storage/setup` endpoint to get your working directory
- Configure Nextflow. Example Nextflow configuration:
```
plugins {
Expand Down
36 changes: 32 additions & 4 deletions docs/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -498,20 +498,48 @@ paths:
- S3
/storage/info:
get:
description: Get details about the current user's storage setup
operationId: get_storage_info
description: 'Return details about the current user''s storage setup.

This endpoint also serves as a mandatory "first time setup" for the user''s
bucket

and authz.'
operationId: storage_setup_2
responses:
'200':
content:
application/json:
schema:
additionalProperties: true
title: Response Storage Setup 2
type: object
description: Successful Response
security:
- HTTPBearer: []
summary: Storage Setup
tags:
- Storage
/storage/setup:
get:
description: 'Return details about the current user''s storage setup.

This endpoint also serves as a mandatory "first time setup" for the user''s
bucket

and authz.'
operationId: storage_setup
responses:
'200':
content:
application/json:
schema:
additionalProperties: true
title: Response Get Storage Info
title: Response Storage Setup
type: object
description: Successful Response
security:
- HTTPBearer: []
summary: Get Storage Info
summary: Storage Setup
tags:
- Storage
/storage/user-bucket:
Expand Down
1 change: 0 additions & 1 deletion docs/s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ Note: This discussion can apply to many use cases, but it is written with a spec
Contents:
- [Using IAM keys](#using-iam-keys)
- [Using a custom S3 endpoint](#using-a-custom-s3-endpoint)
- [Diagram](#diagram)

## Using IAM keys

Expand Down
45 changes: 27 additions & 18 deletions gen3workflow/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,43 +118,52 @@ async def authorize(

return authorized

async def grant_user_access_to_their_own_tasks(
async def grant_user_access_to_their_own_data(
self, username: str, user_id: str
) -> None:
"""
Ensure the specified user exists in Arborist and has a policy granting them access to their
own Gen3Workflow tasks ("read" and "delete" access to resource "/users/<user ID>/gen3-workflow/tasks" for service "gen3-workflow").
own Gen3Workflow tasks and bucket storage.
Args:
username (str): The user's Gen3 username
user_id (str): The user's unique Gen3 ID
"""
logger.info(f"Ensuring user '{user_id}' has access to their own tasks")
resource_path = f"/users/{user_id}/gen3-workflow/tasks"
if await self.authorize(method="read", resources=[resource_path], throw=False):
# if the user already has access to their own tasks, return early
logger.info(
f"Ensuring user '{user_id}' has access to their own tasks and storage"
)
resource_path1 = f"/services/workflow/gen3-workflow/tasks/{user_id}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know I'm nit picking right now, but, if it is not too much trouble, can we update the function to have less code duplication and variable names like tasks_path and storage_path instead of resource_path{1,2}?

Maybe something like

base = "/services/workflow/gen3-workflow"
resources_to_create = [
    (
        f"{base}/tasks",
        tasks_path,
        f"Represents workflow tasks owned by user '{username}'",
    ),
    (
        f"{base}/storage",
        storage_path,
        f"Represents task storage owned by user '{username}'",
    ),
]

for parent_path, resource_path, description in resources_to_create:
    logger.debug("Attempting to create resource '%s' in Arborist", resource_path)
    await self.arborist_client.create_resource(
        parent_path,
        {"name": user_id, "description": description},
        create_parents=True,
    )

Again, I understand this is nitpicking, and we may not update this function at all in the future, so it is fine if you choose to stick to your design.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO sometimes trying too hard to reduce code duplication just makes the code more convoluted and harder to read. Your version is not really shorter, and i think it's hard to understand at a glance, so i'll stick to mine

if await self.authorize(method="read", resources=[resource_path1], throw=False):
# if the user already has access to their own data, return early
return

logger.debug(f"Attempting to create resource '{resource_path}' in Arborist")
parent_path = f"/users/{user_id}/gen3-workflow"
parent_path = "/services/workflow/gen3-workflow/tasks"
logger.debug(f"Attempting to create resource '{resource_path1}' in Arborist")
resource = {
"name": "tasks",
"name": user_id,
"description": f"Represents workflow tasks owned by user '{username}'",
}
await self.arborist_client.create_resource(
parent_path, resource, create_parents=True
)

role_id = "gen3-workflow_task_owner"
resource_path2 = f"/services/workflow/gen3-workflow/storage/{user_id}"
parent_path = "/services/workflow/gen3-workflow/storage"
logger.debug(f"Attempting to create resource '{resource_path2}' in Arborist")
resource = {
"name": user_id,
"description": f"Represents task storage owned by user '{username}'",
}
await self.arborist_client.create_resource(
parent_path, resource, create_parents=True
)

role_id = "gen3_workflow_admin"
role = {
"id": role_id,
"permissions": [
{
"id": "gen3-workflow-reader",
"action": {"service": "gen3-workflow", "method": "read"},
},
{
"id": "gen3-workflow-deleter",
"action": {"service": "gen3-workflow", "method": "delete"},
"id": "gen3_workflow_admin_action",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we only give gen3_workflow_reader_action and gen3_workflow_creator_action instead of gen3_workflow_admin_action to all the users?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Users should be able to cancel tasks and delete inputs/outputs from s3 to lower costs for example

"action": {"service": "gen3-workflow", "method": "*"},
},
],
}
Expand All @@ -168,13 +177,13 @@ async def grant_user_access_to_their_own_tasks(
)
await self.arborist_client.create_role(role)

policy_id = f"gen3-workflow_task_owner_sub-{user_id}"
policy_id = f"gen3_workflow_user_sub_{user_id}"
logger.debug(f"Attempting to create policy '{policy_id}' in Arborist")
policy = {
"id": policy_id,
"description": f"policy created by gen3-workflow for user '{username}'",
"role_ids": [role_id],
"resource_paths": [resource_path],
"resource_paths": [resource_path1, resource_path2],
}
await self.arborist_client.create_policy(policy, skip_if_exists=True)

Expand Down
2 changes: 1 addition & 1 deletion gen3workflow/aws_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def create_iam_role_for_bucket_access(user_id: str) -> str:
if config["KMS_ENCRYPTION_ENABLED"]:
_, kms_key_arn = get_existing_kms_key_for_bucket(bucket_name)
if not kms_key_arn:
err_msg = "Bucket misconfigured. Hit the `GET /storage/info` endpoint and try again."
err_msg = "Bucket misconfigured. Hit the `GET /storage/setup` endpoint and try again."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always felt, the GET /storage/info is doing more than what its name says. This is better :D

logger.error(
f"No existing KMS key found for bucket '{bucket_name}'. {err_msg}"
)
Expand Down
Loading
Loading