diff --git a/components/renku_data_services/connected_services/api.spec.yaml b/components/renku_data_services/connected_services/api.spec.yaml index d2c5463c0..fa90810db 100644 --- a/components/renku_data_services/connected_services/api.spec.yaml +++ b/components/renku_data_services/connected_services/api.spec.yaml @@ -109,6 +109,35 @@ paths: $ref: "#/components/responses/Error" tags: - oauth2 + /oauth2/providers/{provider_id}/simple_connect: + post: + summary: Authorize using a given token set + description: | + Provide a pre-defined token set to store and use for + connecting to an external service. + parameters: + - in: path + name: provider_id + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/SimpleConnect" + responses: + "201": + description: The connection was created + content: + application/json: + schema: + $ref: "#/components/schemas/Connection" + default: + $ref: "#/components/responses/Error" + tags: + - oauth2 /oauth2/providers/{provider_id}/authorize: get: summary: Authorize an OAuth2 Client @@ -373,6 +402,15 @@ components: - id - provider_id - status + SimpleConnect: + type: object + properties: + access_token: + type: string + username: + type: string + required: + - access_token ConnectedAccount: type: object additionalProperties: false @@ -430,6 +468,7 @@ components: - "onedrive" - "dropbox" - "generic_oidc" + - "dockerhub" example: "gitlab" ApplicationSlug: description: | diff --git a/components/renku_data_services/connected_services/apispec.py b/components/renku_data_services/connected_services/apispec.py index f6f9c5417..b80c97699 100644 --- a/components/renku_data_services/connected_services/apispec.py +++ b/components/renku_data_services/connected_services/apispec.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: api.spec.yaml -# timestamp: 2025-09-05T11:16:18+00:00 +# timestamp: 2025-09-25T15:11:50+00:00 from __future__ import annotations @@ -12,6 +12,11 @@ from renku_data_services.connected_services.apispec_base import BaseAPISpec +class SimpleConnect(BaseAPISpec): + access_token: str + username: Optional[str] = None + + class RepositorySelection(Enum): all = "all" selected = "selected" @@ -35,6 +40,7 @@ class ProviderKind(Enum): onedrive = "onedrive" dropbox = "dropbox" generic_oidc = "generic_oidc" + dockerhub = "dockerhub" class ConnectionStatus(Enum): diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 0a33c3225..84de5b21c 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -129,6 +129,24 @@ async def _callback(request: Request) -> HTTPResponse: return "/oauth2/callback", ["GET"], _callback + def custom_connect(self) -> BlueprintFactoryResponse: + """Custom connection.""" + + @authenticate(self.authenticator) + @validate(json=apispec.SimpleConnect) + async def _custom_connect( + _: Request, user: base_models.APIUser, provider_id: str, body: apispec.SimpleConnect + ) -> JSONResponse: + provider_id = unquote(provider_id) + token_set = body.model_dump() + conn_id = await self.connected_services_repo.custom_connect(user, provider_id, token_set) + conn = apispec.Connection( + id=str(conn_id), provider_id=provider_id, status=apispec.ConnectionStatus.connected + ) + return validated_json(apispec.Connection, conn) + + return "/oauth2/providers//simple_connect", ["POST"], _custom_connect + def _get_callback_url(self, request: Request) -> str: callback_url = request.url_for(f"{self.name}.{self.authorize_callback.__name__}") # TODO: configure the server to trust the reverse proxy so that the request scheme is always "https". diff --git a/components/renku_data_services/connected_services/db.py b/components/renku_data_services/connected_services/db.py index 2902ac0b7..7482aaf21 100644 --- a/components/renku_data_services/connected_services/db.py +++ b/components/renku_data_services/connected_services/db.py @@ -44,7 +44,11 @@ def __init__( self.session_maker = session_maker self.encryption_key = encryption_key self.async_oauth2_client_class = async_oauth2_client_class - self.supported_image_registry_providers = {models.ProviderKind.gitlab, models.ProviderKind.github} + self.supported_image_registry_providers = { + models.ProviderKind.gitlab, + models.ProviderKind.github, + models.ProviderKind.dockerhub, + } async def get_oauth2_clients( self, @@ -247,6 +251,40 @@ async def authorize_client( return url + async def custom_connect(self, user: APIUser, client_id: str, token: dict[str, Any]) -> ULID: + """Adds a custom connection using the opaque token as given.""" + if not user.is_authenticated or user.id is None: + raise errors.ForbiddenError(message="You do not have the required permissions for this operation.") + + if client_id == "" or token == {} or token.get("access_token") is None: + raise errors.ValidationError(message="Client id and token are mandatory") + + token_set = self._encrypt_token_set(token=token, user_id=user.id) + supported_providers = {models.ProviderKind.dockerhub} + async with self.session_maker() as session, session.begin(): + result = await session.scalars( + select(schemas.OAuth2ClientORM) + .where(schemas.OAuth2ClientORM.id == client_id) + .where(schemas.OAuth2ClientORM.kind.in_(supported_providers)) + ) + client = result.one_or_none() + if client is None: + raise errors.MissingResourceError( + message=f"OAuth2 Client with id '{client_id}' does not exist or doesn't support direct connections." + ) + + conn_orm = schemas.OAuth2ConnectionORM( + user_id=user.id, + client_id=client_id, + token=token_set, + state=None, + status=models.ConnectionStatus.connected, + code_verifier=None, + next_url=None, + ) + session.add(conn_orm) + return conn_orm.id + async def authorize_callback(self, state: str, raw_url: str, callback_url: str) -> str | None: """Performs the OAuth2 authorization callback. @@ -381,11 +419,22 @@ async def get_oauth2_connected_account( raise errors.UnauthorizedError(message="OAuth2 token for connected service invalid or expired.") from e if response.status_code > 200: - raise errors.UnauthorizedError(message=f"Could not get account information.{response.json()}") + raise errors.UnauthorizedError(message=f"Could not get account information.{response.text}") account = adapter.api_validate_account_response(response) return account + async def get_non_oauth2_token(self, connection_id: ULID, user: base_models.APIUser) -> models.OAuth2TokenSet: + """Return the connection token.""" + if not user.is_authenticated or user.id is None: + raise errors.MissingResourceError( + message=f"OAuth2 connection with id '{connection_id}' does not exist or you do not have access to it." + ) + + connection = await self._get_valid_connection(connection_id, user) + token = self._decrypt_token_set(token=connection.token or {}, user_id=user.id) + return token + async def get_oauth2_connection_token( self, connection_id: ULID, user: base_models.APIUser ) -> models.OAuth2TokenSet: @@ -445,11 +494,16 @@ async def get_image_repo_client(self, image_provider: models.ImageProvider) -> I assert image_provider.connected_user is not None user = image_provider.connected_user.user conn = image_provider.connected_user.connection - token_set = await self.get_oauth2_connection_token(conn.id, user) + if image_provider.provider.kind == models.ProviderKind.dockerhub: + token_set = await self.get_non_oauth2_token(conn.id, user) + else: + token_set = await self.get_oauth2_connection_token(conn.id, user) access_token = token_set.access_token if access_token: - logger.debug(f"Use connection {conn.id} to {image_provider.provider.id} for user {user.id}") - repo_api = repo_api.with_oauth2_token(access_token) + logger.debug( + f"Use connection {conn.id} to {image_provider.provider.id} for user {user.id}/{token_set.username}" + ) + repo_api = repo_api.with_oauth2_token(access_token, token_set.username) return repo_api async def get_oauth2_app_installations( @@ -483,16 +537,10 @@ async def get_oauth2_app_installations( return models.AppInstallationList(total_count=0, installations=[]) - @asynccontextmanager - async def get_async_oauth2_client( + async def _get_valid_connection( self, connection_id: ULID, user: base_models.APIUser - ) -> AsyncGenerator[tuple[AsyncOAuth2Client, schemas.OAuth2ConnectionORM, ProviderAdapter], None]: - """Get the AsyncOAuth2Client for the given connection_id and user.""" - if not user.is_authenticated or user.id is None: - raise errors.MissingResourceError( - message=f"OAuth2 connection with id '{connection_id}' does not exist or you do not have access to it." - ) - + ) -> schemas.OAuth2ConnectionORM: + """Return a valid, connected connection.""" async with self.session_maker() as session: result = await session.scalars( select(schemas.OAuth2ConnectionORM) @@ -508,9 +556,20 @@ async def get_async_oauth2_client( if connection.status != models.ConnectionStatus.connected or connection.token is None: raise errors.UnauthorizedError(message=f"OAuth2 connection with id '{connection_id}' is not valid.") + return connection - client = connection.client - token = self._decrypt_token_set(token=connection.token, user_id=user.id) + @asynccontextmanager + async def get_async_oauth2_client( + self, connection_id: ULID, user: base_models.APIUser + ) -> AsyncGenerator[tuple[AsyncOAuth2Client, schemas.OAuth2ConnectionORM, ProviderAdapter], None]: + """Get the AsyncOAuth2Client for the given connection_id and user.""" + if not user.is_authenticated or user.id is None: + raise errors.MissingResourceError( + message=f"OAuth2 connection with id '{connection_id}' does not exist or you do not have access to it." + ) + connection = await self._get_valid_connection(connection_id, user) + client = connection.client + token = self._decrypt_token_set(token=connection.token or {}, user_id=user.id) async def update_token(token: dict[str, Any], refresh_token: str | None = None) -> None: if refresh_token is None: diff --git a/components/renku_data_services/connected_services/models.py b/components/renku_data_services/connected_services/models.py index acd9db813..e4fce5a09 100644 --- a/components/renku_data_services/connected_services/models.py +++ b/components/renku_data_services/connected_services/models.py @@ -19,6 +19,7 @@ class ProviderKind(StrEnum): onedrive = "onedrive" dropbox = "dropbox" generic_oidc = "generic_oidc" + dockerhub = "dockerhub" class ConnectionStatus(StrEnum): @@ -115,6 +116,11 @@ def dump_for_api(self) -> dict[str, Any]: data["expires_at_iso"] = self.expires_at_iso return data + @property + def username(self) -> str | None: + """Return the username property.""" + return self.get("username") + @property def access_token(self) -> str | None: """Returns the access token.""" diff --git a/components/renku_data_services/migrations/helper.py b/components/renku_data_services/migrations/helper.py new file mode 100644 index 000000000..50d8ad557 --- /dev/null +++ b/components/renku_data_services/migrations/helper.py @@ -0,0 +1,22 @@ +"""Helper functions for writing migrations.""" + +from alembic import op +from psycopg import sql + + +def get_enum_values(enum_type: str) -> list[str]: + """Return all values for the given enum type.""" + connection = op.get_bind() + ident = sql.Identifier(enum_type) + statement = ( + sql.SQL("select unnest(enum_range(null::{}))").format(ident).as_string(connection) # type: ignore[arg-type] + ) + result = connection.exec_driver_sql(statement) + rows = result.all() + return [v[0] for v in rows] + + +def create_enum_type(enum_type: str, values: list[str]) -> None: + """Creates a new enum type.""" + value_list = ", ".join([f"'{e}'" for e in values]) + op.execute(f"CREATE TYPE {enum_type} AS ENUM ({value_list})") diff --git a/components/renku_data_services/migrations/versions/c7d32e8b52cf_add_dockerhub_provider_kind.py b/components/renku_data_services/migrations/versions/c7d32e8b52cf_add_dockerhub_provider_kind.py new file mode 100644 index 000000000..7aef0294f --- /dev/null +++ b/components/renku_data_services/migrations/versions/c7d32e8b52cf_add_dockerhub_provider_kind.py @@ -0,0 +1,36 @@ +"""add dockerhub provider kind + +Revision ID: c7d32e8b52cf +Revises: 8365db35dc76 +Create Date: 2025-09-25 14:05:55.447619 + +""" + +from alembic import op + +import renku_data_services.migrations.helper as helper + +# revision identifiers, used by Alembic. +revision = "c7d32e8b52cf" +down_revision = "8365db35dc76" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute("ALTER TYPE providerkind ADD VALUE 'dockerhub'") + + +def downgrade() -> None: + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'dockerhub'") + + current_values = helper.get_enum_values("providerkind") + current_values.remove("dockerhub") + + op.execute("ALTER TYPE providerkind RENAME TO providerkind_old;") + helper.create_enum_type("providerkind", current_values) + op.execute( + "ALTER TABLE connected_services.oauth2_clients ALTER COLUMN kind SET DATA TYPE providerkind USING kind::text::providerkind" + ) + + op.execute("DROP TYPE providerkind_old CASCADE") diff --git a/components/renku_data_services/notebooks/api/classes/image.py b/components/renku_data_services/notebooks/api/classes/image.py index a8170a6a0..8ea4dd9f6 100644 --- a/components/renku_data_services/notebooks/api/classes/image.py +++ b/components/renku_data_services/notebooks/api/classes/image.py @@ -40,6 +40,7 @@ class ImageRepoDockerAPI: hostname: str oauth2_token: Optional[str] = field(default=None, repr=False) + username: str = "oauth2" # NOTE: We need to follow redirects so that we can authenticate with the image repositories properly. # NOTE: If we do not use default_factory to create the client here requests will fail because it can happen @@ -74,9 +75,11 @@ async def _get_docker_token(self, image: Image) -> Optional[str]: return None headers = {"Accept": "application/json"} if self.oauth2_token: - creds = base64.b64encode(f"oauth2:{self.oauth2_token}".encode()).decode() + logger.debug(f"Use credentials for user: {self.username}") + creds = base64.b64encode(f"{self.username}:{self.oauth2_token}".encode()).decode() headers["Authorization"] = f"Basic {creds}" token_req = await self.client.get(realm, params=params, headers=headers) + logger.debug(f"Docker token response for {self.username}: {token_req.status_code}") return str(token_req.json().get("token")) async def get_image_manifest( @@ -192,9 +195,17 @@ async def image_workdir(self, image: Image) -> Optional[PurePosixPath]: workdir = "/" return PurePosixPath(workdir) - def with_oauth2_token(self, oauth2_token: str) -> ImageRepoDockerAPI: + def with_oauth2_token(self, oauth2_token: str, user: str | None = None) -> ImageRepoDockerAPI: """Return a docker API instance with the token as authentication.""" - return ImageRepoDockerAPI(hostname=self.hostname, scheme=self.scheme, oauth2_token=oauth2_token) + return ImageRepoDockerAPI( + hostname=self.hostname, scheme=self.scheme, oauth2_token=oauth2_token, username=user or self.username + ) + + def with_user_name(self, user: str) -> ImageRepoDockerAPI: + """Return a docker api instance with the given user set.""" + return ImageRepoDockerAPI( + hostname=self.hostname, scheme=self.scheme, oauth2_token=self.oauth2_token, username=user + ) def maybe_with_oauth2_token(self, token_hostname: str | None, oauth2_token: str | None) -> ImageRepoDockerAPI: """Return a docker API instance with the token as authentication. diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index bd9103968..1a355972d 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -23,6 +23,7 @@ from renku_data_services.base_models import AnonymousAPIUser, APIUser, AuthenticatedAPIUser from renku_data_services.base_models.metrics import MetricsService from renku_data_services.connected_services.db import ConnectedServicesRepository +from renku_data_services.connected_services.models import ProviderKind from renku_data_services.crc.db import ClusterRepository, ResourcePoolRepository from renku_data_services.crc.models import GpuKind, ResourceClass, ResourcePool from renku_data_services.data_connectors.db import ( @@ -531,10 +532,8 @@ async def __requires_image_pull_secret(nb_config: NotebooksConfig, image: str, i return False -def __format_image_pull_secret(secret_name: str, access_token: str, registry_domain: str) -> ExtraSecret: - registry_secret = { - "auths": {registry_domain: {"auth": base64.b64encode(f"oauth2:{access_token}".encode()).decode()}} - } +def __format_image_pull_secret(secret_name: str, token: str, registry_domain: str) -> ExtraSecret: + registry_secret = {"auths": {registry_domain: {"auth": token}}} registry_secret = json.dumps(registry_secret) registry_secret = base64.b64encode(registry_secret.encode()).decode() return ExtraSecret( @@ -559,10 +558,14 @@ async def __get_gitlab_image_pull_secret_v2( if not image_check_result.image_provider: return None + registry_url = image_check_result.image_provider.registry_url + if image_check_result.image_provider.provider.kind == ProviderKind.dockerhub: + registry_url = "https://index.docker.io/v1/" + return __format_image_pull_secret( secret_name=secret_name, - access_token=image_check_result.token, - registry_domain=image_check_result.image_provider.registry_url, + token=image_check_result.token, + registry_domain=registry_url, ) diff --git a/components/renku_data_services/notebooks/image_check.py b/components/renku_data_services/notebooks/image_check.py index 5415bcce5..7b5ba3c7c 100644 --- a/components/renku_data_services/notebooks/image_check.py +++ b/components/renku_data_services/notebooks/image_check.py @@ -14,6 +14,7 @@ from __future__ import annotations +import base64 from dataclasses import dataclass, field from typing import final @@ -111,10 +112,13 @@ async def check_image(image: Image, user: APIUser, connected_services: Connected logger.info(f"Error getting connected account: {e}") unauth_error = e + token = f"{reg_api.username}:{reg_api.oauth2_token}" + token = base64.b64encode(token.encode()).decode() + return CheckResult( accessible=result == 200, response_code=result, image_provider=image_provider, - token=reg_api.oauth2_token, + token=token, error=unauth_error, )