Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f14178b
Add AWS credentials database tables for static and temporary credentials
Eden-D-Zhang Oct 20, 2025
08682e0
Initialize AWS credential tables in database setup script
Eden-D-Zhang Oct 20, 2025
5e0793e
Add Pydantic models for AWS credentials with validation and conversio…
Eden-D-Zhang Oct 20, 2025
a395256
Add S3CredentialManager class for database credential CRUD operations
Eden-D-Zhang Oct 20, 2025
535203c
Implement session token cache methods for temporary credential manage…
Eden-D-Zhang Oct 20, 2025
9cac4c3
Align credential name validation pattern in Pydantic model with manag…
Eden-D-Zhang Oct 20, 2025
3906383
Merge branch 'main' of https://github.com/Eden-D-Zhang/clp into packa…
Eden-D-Zhang Oct 20, 2025
febfb75
Use modern type syntax and improve code organization in credential ma…
Eden-D-Zhang Oct 20, 2025
5344d16
Merge branch 'main' of https://github.com/y-scope/clp into package-s3…
Eden-D-Zhang Oct 23, 2025
85ce7f9
Use clp logger
Eden-D-Zhang Oct 23, 2025
517c995
Remove unnecessary functions
Eden-D-Zhang Oct 23, 2025
b1f70d1
Lint
Eden-D-Zhang Oct 23, 2025
e7f8853
Remove unused import
Eden-D-Zhang Oct 23, 2025
99c1dbf
Remove unused import again
Eden-D-Zhang Oct 23, 2025
d3808b2
Coderabbit review
Eden-D-Zhang Oct 23, 2025
58f05e0
Lint
Eden-D-Zhang Oct 23, 2025
417d73e
Merge branch 'main' of https://github.com/y-scope/clp into package-s3…
Eden-D-Zhang Oct 27, 2025
2576b2e
Fix comment
Eden-D-Zhang Oct 27, 2025
2391c19
Clean up docstrings.
Eden-D-Zhang Oct 27, 2025
4fc9a89
Clean up docstrings.
Eden-D-Zhang Oct 27, 2025
2b3258a
Merge branch 'package-s3-credentials' of https://github.com/Eden-D-Zh…
Eden-D-Zhang Oct 27, 2025
36c9a65
Docstrings again
Eden-D-Zhang Oct 27, 2025
51c92fc
Fix index length
Eden-D-Zhang Oct 30, 2025
4be0a49
Add some descriptions
Eden-D-Zhang Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions components/clp-py-utils/clp_py_utils/clp_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import pathlib
from datetime import datetime, timedelta, timezone
from enum import auto
from typing import Annotated, Any, ClassVar, Literal, Optional, Union

Expand All @@ -11,6 +12,7 @@
model_validator,
PlainSerializer,
PrivateAttr,
SecretStr,
)
from strenum import KebabCaseStrEnum, LowercaseStrEnum

Expand Down Expand Up @@ -416,6 +418,100 @@ def validate_authentication(cls, data):
return data


class AwsCredential(BaseModel):
"""
Represents a stored AWS credential retrieved from the database.

This model is used for credentials that are persisted in the `aws_credentials` table.
Credentials can be either static (access key + secret key) or configured for role
assumption.
"""

id: int
name: Annotated[
str,
Field(
min_length=1,
max_length=255,
pattern=r"^[a-zA-Z0-9_-]+$",
description="Credential name (alphanumeric, hyphens, underscores only; 1-255 characters)",
),
]

access_key_id: SecretStr
secret_access_key: SecretStr
role_arn: str | None = None

created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))

def to_s3_credentials(self) -> S3Credentials:
"""
Converts to `S3Credentials` for use with boto3.

Note: This only works for static credentials. For temporary credentials
with session tokens, use the `TemporaryCredential` model instead.

:return: `S3Credentials` object with secrets revealed.
"""
return S3Credentials(
access_key_id=self.access_key_id.get_secret_value(),
secret_access_key=self.secret_access_key.get_secret_value(),
session_token=None,
)


class TemporaryCredential(BaseModel):
"""
Represents cached temporary credentials (session tokens).

This model is used for credentials cached in the `aws_temporary_credentials` table.
These credentials can come from various sources:
- STS AssumeRole operations
- Resource-specific session tokens

The `source` field tracks the origin of the session token, which can be:
- A role ARN: "arn:aws:iam::123456789012:role/MyRole"
- An S3 resource ARN: "arn:aws:s3:::bucket/path/*"
"""

id: int
long_term_key_id: int # Foreign key to aws_credentials table
access_key_id: SecretStr
secret_access_key: SecretStr
session_token: SecretStr
source: str # Role ARN or S3 resource ARN
expires_at: datetime
created_at: datetime

def to_s3_credentials(self) -> S3Credentials:
"""
Converts to `S3Credentials` for use with boto3.

:return: `S3Credentials` object with secrets revealed.
"""
return S3Credentials(
access_key_id=self.access_key_id.get_secret_value(),
secret_access_key=self.secret_access_key.get_secret_value(),
session_token=self.session_token.get_secret_value(),
)

def is_expired(self, buffer_minutes: int = 5) -> bool:
"""
Checks if credential is expired or expiring soon.

:param buffer_minutes: Minutes of buffer before expiration to consider credential expired.
:return: True if expired or expiring within `buffer_minutes`.
"""

now = datetime.now(timezone.utc)
exp = self.expires_at
if exp.tzinfo is None:
# Assume DB stores UTC; attach UTC tzinfo to compare safely.
exp = exp.replace(tzinfo=timezone.utc)
return now >= exp - timedelta(minutes=buffer_minutes)


class S3Config(BaseModel):
region_code: NonEmptyStr
bucket: NonEmptyStr
Expand Down
81 changes: 81 additions & 0 deletions components/clp-py-utils/clp_py_utils/clp_metadata_db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

ARCHIVE_TAGS_TABLE_SUFFIX = "archive_tags"
ARCHIVES_TABLE_SUFFIX = "archives"
AWS_CREDENTIALS_TABLE_SUFFIX = "aws_credentials"
AWS_TEMPORARY_CREDENTIALS_TABLE_SUFFIX = "aws_temporary_credentials"
COLUMN_METADATA_TABLE_SUFFIX = "column_metadata"
DATASETS_TABLE_SUFFIX = "datasets"
FILES_TABLE_SUFFIX = "files"
Expand All @@ -21,6 +23,8 @@
TABLE_SUFFIX_MAX_LEN = max(
len(ARCHIVE_TAGS_TABLE_SUFFIX),
len(ARCHIVES_TABLE_SUFFIX),
len(AWS_CREDENTIALS_TABLE_SUFFIX),
len(AWS_TEMPORARY_CREDENTIALS_TABLE_SUFFIX),
len(COLUMN_METADATA_TABLE_SUFFIX),
len(DATASETS_TABLE_SUFFIX),
len(FILES_TABLE_SUFFIX),
Expand Down Expand Up @@ -110,6 +114,47 @@ def _create_column_metadata_table(db_cursor, table_prefix: str, dataset: str) ->
)


def _create_aws_credentials_table(db_cursor, aws_credentials_table_name: str) -> None:
db_cursor.execute(
f"""
CREATE TABLE IF NOT EXISTS `{aws_credentials_table_name}` (
`id` INT NOT NULL AUTO_INCREMENT,
`name` VARCHAR(255) NOT NULL UNIQUE,
`access_key_id` VARCHAR(255) NOT NULL,
`secret_access_key` VARCHAR(255) NOT NULL,
`role_arn` VARCHAR(2048),
`created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ROW_FORMAT=DYNAMIC
"""
)


def _create_aws_temporary_credentials_table(
db_cursor, aws_temporary_credentials_table_name: str, aws_credentials_table_name: str
) -> None:
db_cursor.execute(
f"""
CREATE TABLE IF NOT EXISTS `{aws_temporary_credentials_table_name}` (
`id` INT NOT NULL AUTO_INCREMENT,
`long_term_key_id` INT NOT NULL,
`access_key_id` VARCHAR(255) NOT NULL,
`secret_access_key` VARCHAR(255) NOT NULL,
`session_token` VARCHAR(2048) NOT NULL,
`source` VARCHAR(2048) NOT NULL,
`expires_at` DATETIME NOT NULL,
`created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `long_term_key_expires` (`long_term_key_id`, `expires_at`),
KEY `source_expires` (`source`(512), `expires_at`),
FOREIGN KEY (`long_term_key_id`) REFERENCES `{aws_credentials_table_name}` (`id`)
ON DELETE CASCADE
) ROW_FORMAT=DYNAMIC
"""
)


def _get_table_name(prefix: str, suffix: str, dataset: str | None) -> str:
"""
:param prefix:
Expand Down Expand Up @@ -145,6 +190,34 @@ def create_datasets_table(db_cursor, table_prefix: str) -> None:
)


def create_aws_credentials_table(db_cursor, table_prefix: str) -> None:
"""
Creates the AWS credentials table for storing user-managed static credentials.

:param db_cursor: The database cursor to execute the table creation.
:param table_prefix: A string to prepend to the table name.
"""
aws_credentials_table_name = get_aws_credentials_table_name(table_prefix)
_create_aws_credentials_table(db_cursor, aws_credentials_table_name)


def create_aws_temporary_credentials_table(db_cursor, table_prefix: str) -> None:
"""
Creates the AWS temporary credentials table for storing cached session tokens.

This table caches session tokens from various sources (user-provided, role assumption, etc.)
to enable efficient credential reuse. It references the aws_credentials table via foreign key.

:param db_cursor: The database cursor to execute the table creation.
:param table_prefix: A string to prepend to the table name.
"""
aws_credentials_table_name = get_aws_credentials_table_name(table_prefix)
aws_temporary_credentials_table_name = get_aws_temporary_credentials_table_name(table_prefix)
_create_aws_temporary_credentials_table(
db_cursor, aws_temporary_credentials_table_name, aws_credentials_table_name
)


def add_dataset(
db_conn,
db_cursor,
Expand Down Expand Up @@ -300,6 +373,14 @@ def get_archives_table_name(table_prefix: str, dataset: str | None) -> str:
return _get_table_name(table_prefix, ARCHIVES_TABLE_SUFFIX, dataset)


def get_aws_credentials_table_name(table_prefix: str) -> str:
return _get_table_name(table_prefix, AWS_CREDENTIALS_TABLE_SUFFIX, None)


def get_aws_temporary_credentials_table_name(table_prefix: str) -> str:
return _get_table_name(table_prefix, AWS_TEMPORARY_CREDENTIALS_TABLE_SUFFIX, None)


def get_column_metadata_table_name(table_prefix: str, dataset: str | None) -> str:
return _get_table_name(table_prefix, COLUMN_METADATA_TABLE_SUFFIX, dataset)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
StorageEngine,
)
from clp_py_utils.clp_metadata_db_utils import (
create_aws_credentials_table,
create_aws_temporary_credentials_table,
create_datasets_table,
create_metadata_db_tables,
)
Expand Down Expand Up @@ -61,6 +63,9 @@ def main(argv):
with closing(sql_adapter.create_connection(True)) as metadata_db, closing(
metadata_db.cursor(dictionary=True)
) as metadata_db_cursor:
create_aws_credentials_table(metadata_db_cursor, table_prefix)
create_aws_temporary_credentials_table(metadata_db_cursor, table_prefix)

if StorageEngine.CLP_S == storage_engine:
create_datasets_table(metadata_db_cursor, table_prefix)
else:
Expand Down
Loading
Loading