diff --git a/Makefile b/Makefile index 0fd2eed62..c78ff9e65 100644 --- a/Makefile +++ b/Makefile @@ -40,10 +40,12 @@ components/renku_data_services/platform/apispec.py: components/renku_data_servic poetry run datamodel-codegen --input components/renku_data_services/platform/api.spec.yaml --output components/renku_data_services/platform/apispec.py --base-class renku_data_services.platform.apispec_base.BaseAPISpec $(codegen_params) components/renku_data_services/message_queue/apispec.py: components/renku_data_services/message_queue/api.spec.yaml poetry run datamodel-codegen --input components/renku_data_services/message_queue/api.spec.yaml --output components/renku_data_services/message_queue/apispec.py --base-class renku_data_services.message_queue.apispec_base.BaseAPISpec $(codegen_params) +components/renku_data_services/data_connectors/apispec.py: components/renku_data_services/data_connectors/api.spec.yaml + poetry run datamodel-codegen --input components/renku_data_services/data_connectors/api.spec.yaml --output components/renku_data_services/data_connectors/apispec.py --base-class renku_data_services.data_connectors.apispec_base.BaseAPISpec $(codegen_params) ##@ Apispec -schemas: components/renku_data_services/crc/apispec.py components/renku_data_services/storage/apispec.py components/renku_data_services/users/apispec.py components/renku_data_services/project/apispec.py components/renku_data_services/namespace/apispec.py components/renku_data_services/secrets/apispec.py components/renku_data_services/connected_services/apispec.py components/renku_data_services/repositories/apispec.py components/renku_data_services/notebooks/apispec.py components/renku_data_services/platform/apispec.py components/renku_data_services/message_queue/apispec.py ## Generate pydantic classes from apispec yaml files +schemas: components/renku_data_services/crc/apispec.py components/renku_data_services/storage/apispec.py components/renku_data_services/users/apispec.py components/renku_data_services/project/apispec.py components/renku_data_services/namespace/apispec.py components/renku_data_services/secrets/apispec.py components/renku_data_services/connected_services/apispec.py components/renku_data_services/repositories/apispec.py components/renku_data_services/notebooks/apispec.py components/renku_data_services/platform/apispec.py components/renku_data_services/message_queue/apispec.py components/renku_data_services/data_connectors/apispec.py ## Generate pydantic classes from apispec yaml files @echo "generated classes based on ApiSpec" ##@ Avro schemas diff --git a/bases/renku_data_services/background_jobs/config.py b/bases/renku_data_services/background_jobs/config.py index 8befedee1..dc3d7986e 100644 --- a/bases/renku_data_services/background_jobs/config.py +++ b/bases/renku_data_services/background_jobs/config.py @@ -9,6 +9,8 @@ from renku_data_services.authz.authz import Authz from renku_data_services.authz.config import AuthzConfig +from renku_data_services.data_connectors.db import DataConnectorProjectLinkRepository, DataConnectorRepository +from renku_data_services.data_connectors.migration_utils import DataConnectorMigrationTool from renku_data_services.errors import errors from renku_data_services.message_queue.config import RedisConfig from renku_data_services.message_queue.db import EventRepository @@ -29,6 +31,7 @@ class SyncConfig: group_repo: GroupRepository event_repo: EventRepository project_repo: ProjectRepository + data_connector_migration_tool: DataConnectorMigrationTool session_maker: Callable[..., AsyncSession] @classmethod @@ -67,7 +70,21 @@ def from_env(cls, prefix: str = "") -> "SyncConfig": group_repo=group_repo, authz=Authz(authz_config), ) - + data_connector_repo = DataConnectorRepository( + session_maker=session_maker, + authz=Authz(authz_config), + ) + data_connector_project_link_repo = DataConnectorProjectLinkRepository( + session_maker=session_maker, + authz=Authz(authz_config), + ) + data_connector_migration_tool = DataConnectorMigrationTool( + session_maker=session_maker, + data_connector_repo=data_connector_repo, + data_connector_project_link_repo=data_connector_project_link_repo, + project_repo=project_repo, + authz=Authz(authz_config), + ) user_repo = UserRepo( session_maker=session_maker, message_queue=message_queue, @@ -89,4 +106,13 @@ def from_env(cls, prefix: str = "") -> "SyncConfig": client_secret = os.environ[f"{prefix}KEYCLOAK_CLIENT_SECRET"] realm = os.environ.get(f"{prefix}KEYCLOAK_REALM", "Renku") kc_api = KeycloakAPI(keycloak_url=keycloak_url, client_id=client_id, client_secret=client_secret, realm=realm) - return cls(syncer, kc_api, authz_config, group_repo, event_repo, project_repo, session_maker) + return cls( + syncer, + kc_api, + authz_config, + group_repo, + event_repo, + project_repo, + data_connector_migration_tool, + session_maker, + ) diff --git a/bases/renku_data_services/background_jobs/core.py b/bases/renku_data_services/background_jobs/core.py index 289fbbbee..d3acc5a1f 100644 --- a/bases/renku_data_services/background_jobs/core.py +++ b/bases/renku_data_services/background_jobs/core.py @@ -264,3 +264,33 @@ async def migrate_user_namespaces_make_all_public(config: SyncConfig) -> None: ) await authz.client.WriteRelationships(authz_change) logger.info(f"Made user namespace {ns_id} public") + + +async def migrate_storages_v2_to_data_connectors(config: SyncConfig) -> None: + """Move storages_v2 to data_connectors.""" + logger = logging.getLogger("background_jobs").getChild(migrate_storages_v2_to_data_connectors.__name__) + + api_user = InternalServiceAdmin(id=ServiceAdminId.migrations) + storages_v2 = await config.data_connector_migration_tool.get_storages_v2(requested_by=api_user) + + if not storages_v2: + logger.info("Nothing to do.") + return + + logger.info(f"Migrating {len(storages_v2)} cloud storage v2 items to data connectors.") + failed_storages: list[str] = [] + for storage in storages_v2: + try: + data_connector = await config.data_connector_migration_tool.migrate_storage_v2( + requested_by=api_user, storage=storage + ) + logger.info(f"Migrated {storage.name} to {data_connector.namespace.slug}/{data_connector.slug}.") + logger.info(f"Deleted storage_v2: {storage.storage_id}") + except Exception as err: + logger.error(f"Failed to migrate {storage.name}.") + logger.error(err) + failed_storages.append(str(storage.storage_id)) + + logger.info(f"Migrated {len(storages_v2)-len(failed_storages)}/{len(storages_v2)} data connectors.") + if failed_storages: + logger.error(f"Migration failed for storages: {failed_storages}.") diff --git a/bases/renku_data_services/background_jobs/main.py b/bases/renku_data_services/background_jobs/main.py index 32bb56c07..27eb51f65 100644 --- a/bases/renku_data_services/background_jobs/main.py +++ b/bases/renku_data_services/background_jobs/main.py @@ -11,6 +11,7 @@ bootstrap_user_namespaces, fix_mismatched_project_namespace_ids, migrate_groups_make_all_public, + migrate_storages_v2_to_data_connectors, migrate_user_namespaces_make_all_public, ) from renku_data_services.migrations.core import run_migrations_for_app @@ -28,6 +29,7 @@ async def short_period_sync() -> None: await fix_mismatched_project_namespace_ids(config) await migrate_groups_make_all_public(config) await migrate_user_namespaces_make_all_public(config) + await migrate_storages_v2_to_data_connectors(config) async def long_period_sync() -> None: diff --git a/bases/renku_data_services/data_api/app.py b/bases/renku_data_services/data_api/app.py index 6352052bb..f50a124df 100644 --- a/bases/renku_data_services/data_api/app.py +++ b/bases/renku_data_services/data_api/app.py @@ -15,13 +15,14 @@ ResourcePoolUsersBP, UserResourcePoolsBP, ) +from renku_data_services.data_connectors.blueprints import DataConnectorsBP from renku_data_services.message_queue.blueprints import SearchBP from renku_data_services.namespace.blueprints import GroupsBP from renku_data_services.platform.blueprints import PlatformConfigBP from renku_data_services.project.blueprints import ProjectsBP from renku_data_services.repositories.blueprints import RepositoriesBP from renku_data_services.session.blueprints import EnvironmentsBP, SessionLaunchersBP -from renku_data_services.storage.blueprints import StorageBP, StorageSchemaBP, StoragesV2BP +from renku_data_services.storage.blueprints import StorageBP, StorageSchemaBP from renku_data_services.users.blueprints import KCUsersBP, UserPreferencesBP, UserSecretsBP @@ -75,12 +76,6 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic: storage_repo=config.storage_repo, authenticator=config.gitlab_authenticator, ) - storages_v2 = StoragesV2BP( - name="storages_v2", - url_prefix=url_prefix, - storage_v2_repo=config.storage_v2_repo, - authenticator=config.authenticator, - ) storage_schema = StorageSchemaBP(name="storage_schema", url_prefix=url_prefix) user_preferences = UserPreferencesBP( name="user_preferences", @@ -153,6 +148,14 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic: project_repo=config.project_repo, authz=config.authz, ) + data_connectors = DataConnectorsBP( + name="data_connectors", + url_prefix=url_prefix, + data_connector_repo=config.data_connector_repo, + data_connector_to_project_link_repo=config.data_connector_to_project_link_repo, + data_connector_secret_repo=config.data_connector_secret_repo, + authenticator=config.authenticator, + ) app.blueprint( [ resource_pools.blueprint(), @@ -163,7 +166,6 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic: user_secrets.blueprint(), user_resource_pools.blueprint(), storage.blueprint(), - storages_v2.blueprint(), storage_schema.blueprint(), user_preferences.blueprint(), misc.blueprint(), @@ -176,6 +178,7 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic: repositories.blueprint(), platform_config.blueprint(), search.blueprint(), + data_connectors.blueprint(), ] ) diff --git a/components/renku_data_services/app_config/config.py b/components/renku_data_services/app_config/config.py index 787199d22..ebd99feea 100644 --- a/components/renku_data_services/app_config/config.py +++ b/components/renku_data_services/app_config/config.py @@ -25,6 +25,7 @@ import renku_data_services.base_models as base_models import renku_data_services.connected_services import renku_data_services.crc +import renku_data_services.data_connectors import renku_data_services.platform import renku_data_services.repositories import renku_data_services.storage @@ -43,6 +44,11 @@ ServerOptionsDefaults, generate_default_resource_pool, ) +from renku_data_services.data_connectors.db import ( + DataConnectorProjectLinkRepository, + DataConnectorRepository, + DataConnectorSecretRepository, +) from renku_data_services.db_config import DBConfig from renku_data_services.git.gitlab import DummyGitlabAPI, GitlabAPI from renku_data_services.k8s.clients import DummyCoreClient, DummySchedulingClient, K8sCoreClient, K8sSchedulingClient @@ -57,7 +63,7 @@ from renku_data_services.repositories.db import GitRepositoriesRepository from renku_data_services.secrets.db import UserSecretsRepo from renku_data_services.session.db import SessionRepository -from renku_data_services.storage.db import StorageRepository, StorageV2Repository +from renku_data_services.storage.db import StorageRepository from renku_data_services.users.config import UserPreferencesConfig from renku_data_services.users.db import UserPreferencesRepository from renku_data_services.users.db import UserRepo as KcUserRepo @@ -162,7 +168,6 @@ class Config: _user_repo: UserRepository | None = field(default=None, repr=False, init=False) _rp_repo: ResourcePoolRepository | None = field(default=None, repr=False, init=False) _storage_repo: StorageRepository | None = field(default=None, repr=False, init=False) - _storage_v2_repo: StorageV2Repository | None = field(default=None, repr=False, init=False) _project_repo: ProjectRepository | None = field(default=None, repr=False, init=False) _group_repo: GroupRepository | None = field(default=None, repr=False, init=False) _event_repo: EventRepository | None = field(default=None, repr=False, init=False) @@ -175,6 +180,11 @@ class Config: _connected_services_repo: ConnectedServicesRepository | None = field(default=None, repr=False, init=False) _git_repositories_repo: GitRepositoriesRepository | None = field(default=None, repr=False, init=False) _platform_repo: PlatformRepository | None = field(default=None, repr=False, init=False) + _data_connector_repo: DataConnectorRepository | None = field(default=None, repr=False, init=False) + _data_connector_to_project_link_repo: DataConnectorProjectLinkRepository | None = field( + default=None, repr=False, init=False + ) + _data_connector_secret_repo: DataConnectorSecretRepository | None = field(default=None, repr=False, init=False) def __post_init__(self) -> None: # NOTE: Read spec files required for Swagger @@ -218,6 +228,10 @@ def __post_init__(self) -> None: with open(spec_file) as f: search = safe_load(f) + spec_file = Path(renku_data_services.data_connectors.__file__).resolve().parent / "api.spec.yaml" + with open(spec_file) as f: + data_connectors = safe_load(f) + self.spec = merge_api_specs( crc_spec, storage_spec, @@ -229,6 +243,7 @@ def __post_init__(self) -> None: repositories, platform, search, + data_connectors, ) if self.default_resource_pool_file is not None: @@ -273,18 +288,6 @@ def storage_repo(self) -> StorageRepository: ) return self._storage_repo - @property - def storage_v2_repo(self) -> StorageV2Repository: - """The DB adapter for V2 cloud storage configs.""" - if not self._storage_v2_repo: - self._storage_v2_repo = StorageV2Repository( - session_maker=self.db.async_session_maker, - project_authz=self.authz, - user_repo=self.kc_user_repo, - secret_service_public_key=self.secrets_service_public_key, - ) - return self._storage_v2_repo - @property def event_repo(self) -> EventRepository: """The DB adapter for cloud event configs.""" @@ -412,6 +415,36 @@ def platform_repo(self) -> PlatformRepository: ) return self._platform_repo + @property + def data_connector_repo(self) -> DataConnectorRepository: + """The DB adapter for data connectors.""" + if not self._data_connector_repo: + self._data_connector_repo = DataConnectorRepository( + session_maker=self.db.async_session_maker, authz=self.authz + ) + return self._data_connector_repo + + @property + def data_connector_to_project_link_repo(self) -> DataConnectorProjectLinkRepository: + """The DB adapter for data connector to project links.""" + if not self._data_connector_to_project_link_repo: + self._data_connector_to_project_link_repo = DataConnectorProjectLinkRepository( + session_maker=self.db.async_session_maker, authz=self.authz + ) + return self._data_connector_to_project_link_repo + + @property + def data_connector_secret_repo(self) -> DataConnectorSecretRepository: + """The DB adapter for data connector secrets.""" + if not self._data_connector_secret_repo: + self._data_connector_secret_repo = DataConnectorSecretRepository( + session_maker=self.db.async_session_maker, + data_connector_repo=self.data_connector_repo, + user_repo=self.kc_user_repo, + secret_service_public_key=self.secrets_service_public_key, + ) + return self._data_connector_secret_repo + @classmethod def from_env(cls, prefix: str = "") -> "Config": """Create a config from environment variables.""" diff --git a/components/renku_data_services/authz/authz.py b/components/renku_data_services/authz/authz.py index 5ae0237b1..ace5ec026 100644 --- a/components/renku_data_services/authz/authz.py +++ b/components/renku_data_services/authz/authz.py @@ -32,6 +32,7 @@ from renku_data_services.authz.config import AuthzConfig from renku_data_services.authz.models import Change, Member, MembershipChange, Role, Scope, Visibility from renku_data_services.base_models.core import InternalServiceAdmin +from renku_data_services.data_connectors.models import DataConnector, DataConnectorToProjectLink, DataConnectorUpdate from renku_data_services.errors import errors from renku_data_services.namespace.models import Group, GroupUpdate, Namespace, NamespaceKind, NamespaceUpdate from renku_data_services.project.models import Project, ProjectUpdate @@ -51,7 +52,16 @@ def authz(self) -> "Authz": _AuthzChangeFuncResult = TypeVar( "_AuthzChangeFuncResult", - bound=Project | ProjectUpdate | Group | UserInfoUpdate | list[UserInfo] | UserInfo | None, + bound=Project + | ProjectUpdate + | Group + | UserInfoUpdate + | list[UserInfo] + | UserInfo + | DataConnector + | DataConnectorUpdate + | DataConnectorToProjectLink + | None, ) _T = TypeVar("_T") _WithAuthz = TypeVar("_WithAuthz", bound=WithAuthz) @@ -87,6 +97,9 @@ class _Relation(StrEnum): group_platform: str = "group_platform" user_namespace_platform: str = "user_namespace_platform" project_namespace: str = "project_namespace" + data_connector_platform: str = "data_connector_platform" + data_connector_namespace: str = "data_connector_namespace" + linked_to: str = "linked_to" @classmethod def from_role(cls, role: Role) -> "_Relation": @@ -119,6 +132,7 @@ class ResourceType(StrEnum): platform: str = "platform" group: str = "group" user_namespace: str = "user_namespace" + data_connector: str = "data_connector" class AuthzOperation(StrEnum): @@ -129,6 +143,8 @@ class AuthzOperation(StrEnum): update: str = "update" update_or_insert: str = "update_or_insert" insert_many: str = "insert_many" + create_link: str = "create_link" + delete_link: str = "delete_link" class _AuthzConverter: @@ -170,6 +186,10 @@ def group(id: ULID) -> ObjectReference: def user_namespace(id: ULID) -> ObjectReference: return ObjectReference(object_type=ResourceType.user_namespace, object_id=str(id)) + @staticmethod + def data_connector(id: ULID) -> ObjectReference: + return ObjectReference(object_type=ResourceType.data_connector.value, object_id=str(id)) + @staticmethod def to_object(resource_type: ResourceType, resource_id: str | ULID | int) -> ObjectReference: match (resource_type, resource_id): @@ -183,6 +203,8 @@ def to_object(resource_type: ResourceType, resource_id: str | ULID | int) -> Obj return _AuthzConverter.user_namespace(rid) case (ResourceType.group, rid) if isinstance(rid, ULID): return _AuthzConverter.group(rid) + case (ResourceType.data_connector, dcid) if isinstance(dcid, ULID): + return _AuthzConverter.data_connector(dcid) case (ResourceType.platform, _): return _AuthzConverter.platform() raise errors.ProgrammingError( @@ -217,7 +239,7 @@ async def decorated_function( message="The authorization decorator needs to have at least one positional argument after 'user'" ) potential_resource = args[0] - resource: Project | Group | Namespace | None = None + resource: Project | Group | Namespace | DataConnector | None = None match resource_type: case ResourceType.project if isinstance(potential_resource, Project): resource = potential_resource @@ -225,6 +247,8 @@ async def decorated_function( resource = potential_resource case ResourceType.user_namespace if isinstance(potential_resource, Namespace): resource = potential_resource + case ResourceType.data_connector if isinstance(potential_resource, DataConnector): + resource = potential_resource case _: raise errors.ProgrammingError( message="The decorator for checking permissions for authorization database operations " @@ -569,11 +593,40 @@ async def _get_authz_change( f"database updates for inserting namespaces but found {type(res)}" ) authz_change.extend(db_repo.authz._add_user_namespace(res.namespace)) + case AuthzOperation.create, ResourceType.data_connector if isinstance(result, DataConnector): + authz_change = db_repo.authz._add_data_connector(result) + case AuthzOperation.delete, ResourceType.data_connector if result is None: + # NOTE: This means that the data connector does not exist in the first place so nothing was deleted + pass + case AuthzOperation.delete, ResourceType.data_connector if isinstance(result, DataConnector): + user = _extract_user_from_args(*func_args, **func_kwargs) + authz_change = await db_repo.authz._remove_data_connector(user, result) + case AuthzOperation.update, ResourceType.data_connector if isinstance(result, DataConnectorUpdate): + authz_change = _AuthzChange() + if result.old.visibility != result.new.visibility: + user = _extract_user_from_args(*func_args, **func_kwargs) + authz_change.extend(await db_repo.authz._update_data_connector_visibility(user, result.new)) + if result.old.namespace.id != result.new.namespace.id: + user = _extract_user_from_args(*func_args, **func_kwargs) + authz_change.extend(await db_repo.authz._update_data_connector_namespace(user, result.new)) + case AuthzOperation.create_link, ResourceType.data_connector if isinstance( + result, DataConnectorToProjectLink + ): + user = _extract_user_from_args(*func_args, **func_kwargs) + authz_change = await db_repo.authz._add_data_connector_to_project_link(user, result) + case AuthzOperation.delete_link, ResourceType.data_connector if result is None: + # NOTE: This means that the link does not exist in the first place so nothing was deleted + pass + case AuthzOperation.delete_link, ResourceType.data_connector if isinstance( + result, DataConnectorToProjectLink + ): + user = _extract_user_from_args(*func_args, **func_kwargs) + authz_change = await db_repo.authz._remove_data_connector_to_project_link(user, result) case _: resource_id: str | ULID | None = "unknown" - if isinstance(result, (Project, Namespace, Group)): + if isinstance(result, (Project, Namespace, Group, DataConnector)): resource_id = result.id - elif isinstance(result, (ProjectUpdate, NamespaceUpdate, GroupUpdate)): + elif isinstance(result, (ProjectUpdate, NamespaceUpdate, GroupUpdate, DataConnectorUpdate)): resource_id = result.new.id raise errors.ProgrammingError( message=f"Encountered an unknown authorization operation {op} on resource {resource} " @@ -662,12 +715,12 @@ def _add_project(self, project: Project) -> _AuthzChange: if project.visibility == Visibility.PUBLIC: all_users_are_viewers = Relationship( resource=project_res, - relation=_Relation.viewer.value, + relation=_Relation.public_viewer.value, subject=all_users, ) all_anon_users_are_viewers = Relationship( resource=project_res, - relation=_Relation.viewer.value, + relation=_Relation.public_viewer.value, subject=all_anon_users, ) relationships.extend([all_users_are_viewers, all_anon_users_are_viewers]) @@ -694,6 +747,17 @@ async def _remove_project( ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) ) rels: list[Relationship] = [] + async for response in responses: + rels.append(response.relationship) + # Project is also a subject for "linked_to" relations + rel_filter = RelationshipFilter( + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.project.value, optional_subject_id=str(project.id) + ) + ) + responses: AsyncIterable[ReadRelationshipsResponse] = self.client.ReadRelationships( + ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) + ) async for response in responses: rels.append(response.relationship) apply = WriteRelationshipsRequest( @@ -717,12 +781,12 @@ async def _update_project_visibility( anon_users_sub = SubjectReference(object=_AuthzConverter.anonymous_users()) all_users_are_viewers = Relationship( resource=project_res, - relation=_Relation.viewer.value, + relation=_Relation.public_viewer.value, subject=all_users_sub, ) anon_users_are_viewers = Relationship( resource=project_res, - relation=_Relation.viewer.value, + relation=_Relation.public_viewer.value, subject=anon_users_sub, ) make_public = WriteRelationshipsRequest( @@ -1424,6 +1488,74 @@ async def _remove_user_namespace(self, user_id: str, zed_token: ZedToken | None ) return _AuthzChange(apply=apply, undo=undo) + def _add_data_connector(self, data_connector: DataConnector) -> _AuthzChange: + """Create the new data connector and associated resources and relations in the DB.""" + creator = SubjectReference(object=_AuthzConverter.user(data_connector.created_by)) + data_connector_res = _AuthzConverter.data_connector(data_connector.id) + creator_is_owner = Relationship(resource=data_connector_res, relation=_Relation.owner.value, subject=creator) + all_users = SubjectReference(object=_AuthzConverter.all_users()) + all_anon_users = SubjectReference(object=_AuthzConverter.anonymous_users()) + data_connector_namespace = SubjectReference( + object=_AuthzConverter.user_namespace(data_connector.namespace.id) + if data_connector.namespace.kind == NamespaceKind.user + else _AuthzConverter.group(cast(ULID, data_connector.namespace.underlying_resource_id)) + ) + data_connector_in_platform = Relationship( + resource=data_connector_res, + relation=_Relation.data_connector_platform, + subject=SubjectReference(object=self._platform), + ) + data_connector_in_namespace = Relationship( + resource=data_connector_res, relation=_Relation.data_connector_namespace, subject=data_connector_namespace + ) + relationships = [creator_is_owner, data_connector_in_platform, data_connector_in_namespace] + if data_connector.visibility == Visibility.PUBLIC: + all_users_are_viewers = Relationship( + resource=data_connector_res, + relation=_Relation.public_viewer.value, + subject=all_users, + ) + all_anon_users_are_viewers = Relationship( + resource=data_connector_res, + relation=_Relation.public_viewer.value, + subject=all_anon_users, + ) + relationships.extend([all_users_are_viewers, all_anon_users_are_viewers]) + apply = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=i) for i in relationships + ] + ) + undo = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=i) for i in relationships + ] + ) + return _AuthzChange(apply=apply, undo=undo) + + @_is_allowed_on_resource(Scope.DELETE, ResourceType.data_connector) + async def _remove_data_connector( + self, user: base_models.APIUser, data_connector: DataConnector, *, zed_token: ZedToken | None = None + ) -> _AuthzChange: + """Remove the relationships associated with the data connector.""" + consistency = Consistency(at_least_as_fresh=zed_token) if zed_token else Consistency(fully_consistent=True) + rel_filter = RelationshipFilter( + resource_type=ResourceType.data_connector.value, optional_resource_id=str(data_connector.id) + ) + responses: AsyncIterable[ReadRelationshipsResponse] = self.client.ReadRelationships( + ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) + ) + rels: list[Relationship] = [] + async for response in responses: + rels.append(response.relationship) + apply = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=i) for i in rels] + ) + undo = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=i) for i in rels] + ) + return _AuthzChange(apply=apply, undo=undo) + async def _remove_user( self, requested_by: base_models.APIUser, @@ -1455,3 +1587,238 @@ async def _remove_user( updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=i) for i in rels] ) return _AuthzChange(apply=apply, undo=undo) + + # NOTE changing visibility is the same access level as removal + @_is_allowed_on_resource(Scope.DELETE, ResourceType.data_connector) + async def _update_data_connector_visibility( + self, user: base_models.APIUser, data_connector: DataConnector, *, zed_token: ZedToken | None = None + ) -> _AuthzChange: + """Update the visibility of the data connector in the authorization database.""" + data_connector_id_str = str(data_connector.id) + consistency = Consistency(at_least_as_fresh=zed_token) if zed_token else Consistency(fully_consistent=True) + data_connector_res = _AuthzConverter.data_connector(data_connector.id) + all_users_sub = SubjectReference(object=_AuthzConverter.all_users()) + anon_users_sub = SubjectReference(object=_AuthzConverter.anonymous_users()) + all_users_are_viewers = Relationship( + resource=data_connector_res, + relation=_Relation.public_viewer.value, + subject=all_users_sub, + ) + anon_users_are_viewers = Relationship( + resource=data_connector_res, + relation=_Relation.public_viewer.value, + subject=anon_users_sub, + ) + make_public = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=all_users_are_viewers), + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=anon_users_are_viewers), + ] + ) + make_private = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=all_users_are_viewers), + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=anon_users_are_viewers), + ] + ) + rel_filter = RelationshipFilter( + resource_type=ResourceType.data_connector.value, + optional_resource_id=data_connector_id_str, + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.user.value, optional_subject_id=all_users_sub.object.object_id + ), + ) + current_relation_users: ReadRelationshipsResponse | None = await anext( + aiter( + self.client.ReadRelationships( + ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) + ) + ), + None, + ) + rel_filter = RelationshipFilter( + resource_type=ResourceType.project.value, + optional_resource_id=data_connector_id_str, + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.anonymous_user.value, + optional_subject_id=anon_users_sub.object.object_id, + ), + ) + current_relation_anon_users: ReadRelationshipsResponse | None = await anext( + aiter( + self.client.ReadRelationships( + ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) + ) + ), + None, + ) + data_connector_is_public_for_users = ( + current_relation_users is not None + and current_relation_users.relationship.subject.object.object_type == ResourceType.user.value + and current_relation_users.relationship.subject.object.object_id == all_users_sub.object.object_id + ) + data_connector_is_public_for_anon_users = ( + current_relation_anon_users is not None + and current_relation_anon_users.relationship.subject.object.object_type == ResourceType.anonymous_user.value + and current_relation_anon_users.relationship.subject.object.object_id == anon_users_sub.object.object_id, + ) + data_connector_already_public = data_connector_is_public_for_users and data_connector_is_public_for_anon_users + data_connector_already_private = not data_connector_already_public + match data_connector.visibility: + case Visibility.PUBLIC: + if data_connector_already_public: + return _AuthzChange(apply=WriteRelationshipsRequest(), undo=WriteRelationshipsRequest()) + return _AuthzChange(apply=make_public, undo=make_private) + case Visibility.PRIVATE: + if data_connector_already_private: + return _AuthzChange(apply=WriteRelationshipsRequest(), undo=WriteRelationshipsRequest()) + return _AuthzChange(apply=make_private, undo=make_public) + raise errors.ProgrammingError( + message=f"Encountered unknown data connector visibility {data_connector.visibility} when trying to " + f"make a visibility change for data connector with ID {data_connector.id}", + ) + + # NOTE changing namespace is the same access level as removal + @_is_allowed_on_resource(Scope.DELETE, ResourceType.data_connector) + async def _update_data_connector_namespace( + self, user: base_models.APIUser, data_connector: DataConnector, *, zed_token: ZedToken | None = None + ) -> _AuthzChange: + """Update the namespace of the data connector in the authorization database.""" + consistency = Consistency(at_least_as_fresh=zed_token) if zed_token else Consistency(fully_consistent=True) + data_connector_res = _AuthzConverter.data_connector(data_connector.id) + data_connector_filter = RelationshipFilter( + resource_type=ResourceType.data_connector.value, + optional_resource_id=str(data_connector.id), + optional_relation=_Relation.data_connector_namespace.value, + ) + current_namespace: ReadRelationshipsResponse | None = await anext( + aiter( + self.client.ReadRelationships( + ReadRelationshipsRequest(relationship_filter=data_connector_filter, consistency=consistency) + ) + ), + None, + ) + if not current_namespace: + raise errors.ProgrammingError( + message=f"The data connector with ID {data_connector.id} whose namespace is being updated " + "does not currently have a namespace." + ) + if current_namespace.relationship.subject.object.object_id == data_connector.namespace.id: + return _AuthzChange() + new_namespace_sub = ( + SubjectReference(object=_AuthzConverter.group(data_connector.namespace.id)) + if data_connector.namespace.kind == NamespaceKind.group + else SubjectReference(object=_AuthzConverter.user_namespace(data_connector.namespace.id)) + ) + old_namespace_sub = ( + SubjectReference( + object=_AuthzConverter.group(ULID.from_str(current_namespace.relationship.subject.object.object_id)) + ) + if current_namespace.relationship.subject.object.object_type == ResourceType.group.value + else SubjectReference( + object=_AuthzConverter.user_namespace( + ULID.from_str(current_namespace.relationship.subject.object.object_id) + ) + ) + ) + new_namespace = Relationship( + resource=data_connector_res, + relation=_Relation.data_connector_namespace.value, + subject=new_namespace_sub, + ) + old_namespace = Relationship( + resource=data_connector_res, + relation=_Relation.data_connector_namespace.value, + subject=old_namespace_sub, + ) + apply_change = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=new_namespace), + ] + ) + undo_change = WriteRelationshipsRequest( + updates=[ + RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=old_namespace), + ] + ) + return _AuthzChange(apply=apply_change, undo=undo_change) + + async def _add_data_connector_to_project_link( + self, user: base_models.APIUser, link: DataConnectorToProjectLink + ) -> _AuthzChange: + """Links a data connector to a project.""" + # NOTE: we manually check for permissions here since it is not trivially expressed through decorators + allowed_from = await self.has_permission( + user, ResourceType.data_connector, link.data_connector_id, Scope.ADD_LINK + ) + if not allowed_from: + raise errors.MissingResourceError( + message=f"The user with ID {user.id} cannot perform operation {Scope.ADD_LINK} " + f"on {ResourceType.data_connector.value} " + f"with ID {link.data_connector_id} or the resource does not exist." + ) + allowed_to = await self.has_permission(user, ResourceType.project, link.project_id, Scope.WRITE) + if not allowed_to: + raise errors.MissingResourceError( + message=f"The user with ID {user.id} cannot perform operation {Scope.WRITE} " + f"on {ResourceType.project.value} " + f"with ID {link.project_id} or the resource does not exist." + ) + + data_connector_res = _AuthzConverter.data_connector(link.data_connector_id) + project_subject = SubjectReference(object=_AuthzConverter.project(link.project_id)) + relationship = Relationship( + resource=data_connector_res, + relation=_Relation.linked_to.value, + subject=project_subject, + ) + apply = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=relationship)] + ) + undo = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=relationship)] + ) + change = _AuthzChange( + apply=apply, + undo=undo, + ) + return change + + async def _remove_data_connector_to_project_link( + self, user: base_models.APIUser, link: DataConnectorToProjectLink + ) -> _AuthzChange: + """Remove the relationships associated with the link from a data connector to a project.""" + # NOTE: we manually check for permissions here since it is not trivially expressed through decorators + allowed_from = await self.has_permission( + user, ResourceType.data_connector, link.data_connector_id, Scope.DELETE + ) + allowed_to, zed_token = await self._has_permission(user, ResourceType.project, link.project_id, Scope.WRITE) + allowed = allowed_from or allowed_to + if not allowed: + raise errors.MissingResourceError( + message=f"The user with ID {user.id} cannot perform operation {AuthzOperation.delete_link}" + f"on the data connector to project link with ID {link.id} or the resource does not exist." + ) + consistency = Consistency(at_least_as_fresh=zed_token) if zed_token else Consistency(fully_consistent=True) + rel_filter = RelationshipFilter( + resource_type=ResourceType.data_connector.value, + optional_resource_id=str(link.data_connector_id), + optional_relation=_Relation.linked_to.value, + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.project.value, optional_subject_id=str(link.project_id) + ), + ) + responses: AsyncIterable[ReadRelationshipsResponse] = self.client.ReadRelationships( + ReadRelationshipsRequest(consistency=consistency, relationship_filter=rel_filter) + ) + rels: list[Relationship] = [] + async for response in responses: + rels.append(response.relationship) + apply = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_DELETE, relationship=i) for i in rels] + ) + undo = WriteRelationshipsRequest( + updates=[RelationshipUpdate(operation=RelationshipUpdate.OPERATION_TOUCH, relationship=i) for i in rels] + ) + return _AuthzChange(apply=apply, undo=undo) diff --git a/components/renku_data_services/authz/models.py b/components/renku_data_services/authz/models.py index 9e7879e17..6ee61124a 100644 --- a/components/renku_data_services/authz/models.py +++ b/components/renku_data_services/authz/models.py @@ -50,6 +50,7 @@ class Scope(Enum): DELETE: str = "delete" CHANGE_MEMBERSHIP: str = "change_membership" READ_CHILDREN: str = "read_children" + ADD_LINK: str = "add_link" IS_ADMIN: str = "is_admin" diff --git a/components/renku_data_services/authz/schemas.py b/components/renku_data_services/authz/schemas.py index e43cd1b6e..fa24f219e 100644 --- a/components/renku_data_services/authz/schemas.py +++ b/components/renku_data_services/authz/schemas.py @@ -3,10 +3,12 @@ These are applied through alembic migrations in the common migrations folder. """ +from collections.abc import Iterable from dataclasses import dataclass +from typing import cast from authzed.api.v1 import SyncClient -from authzed.api.v1.core_pb2 import SubjectReference +from authzed.api.v1.core_pb2 import Relationship, RelationshipUpdate, SubjectReference from authzed.api.v1.permission_service_pb2 import ( DeleteRelationshipsRequest, DeleteRelationshipsResponse, @@ -15,6 +17,7 @@ WriteRelationshipsRequest, ) from authzed.api.v1.schema_service_pb2 import WriteSchemaRequest, WriteSchemaResponse +from ulid import ULID from renku_data_services.authz.authz import ResourceType, _AuthzConverter, _Relation from renku_data_services.errors import errors @@ -281,3 +284,156 @@ def downgrade( WriteSchemaRequest(schema=_v2), ], ) + +_v4: str = """\ +definition user {} + +definition group { + relation group_platform: platform + relation owner: user + relation editor: user + relation viewer: user + relation public_viewer: user:* | anonymous_user:* + permission read = public_viewer + read_children + permission read_children = viewer + write + permission write = editor + delete + permission change_membership = delete + permission delete = owner + group_platform->is_admin +} + +definition user_namespace { + relation user_namespace_platform: platform + relation owner: user + relation public_viewer: user:* | anonymous_user:* + permission read = public_viewer + read_children + permission read_children = delete + permission write = delete + permission delete = owner + user_namespace_platform->is_admin +} + +definition anonymous_user {} + +definition platform { + relation admin: user + permission is_admin = admin +} + +definition project { + relation project_platform: platform + relation project_namespace: user_namespace | group + relation owner: user + relation editor: user + relation viewer: user + relation public_viewer: user:* | anonymous_user:* + permission read = public_viewer + viewer + write + project_namespace->read_children + permission read_linked_resources = viewer + editor + owner + project_platform->is_admin + permission write = editor + delete + project_namespace->write + permission change_membership = delete + permission delete = owner + project_platform->is_admin + project_namespace->delete +} + +definition data_connector { + relation data_connector_platform: platform + relation data_connector_namespace: user_namespace | group + relation linked_to: project + relation owner: user + relation editor: user + relation viewer: user + relation public_viewer: user:* | anonymous_user:* + permission read = public_viewer + viewer + write + \ + data_connector_namespace->read_children + read_from_linked_resource + permission read_from_linked_resource = linked_to->read_linked_resources + permission write = editor + delete + data_connector_namespace->write + permission change_membership = delete + permission delete = owner + data_connector_platform->is_admin + data_connector_namespace->delete + permission add_link = write + public_viewer +}""" + + +def generate_v4(public_project_ids: Iterable[str]) -> AuthzSchemaMigration: + """Creates the v4 schema migration.""" + up: list[WriteRelationshipsRequest | DeleteRelationshipsRequest | WriteSchemaRequest] = [ + DeleteRelationshipsRequest( + relationship_filter=RelationshipFilter( + resource_type=ResourceType.project.value, + optional_relation=_Relation.viewer.value, + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.user.value, + optional_subject_id=SubjectReference(object=_AuthzConverter.all_users()).object.object_id, + ), + ) + ), + DeleteRelationshipsRequest( + relationship_filter=RelationshipFilter( + resource_type=ResourceType.project.value, + optional_relation=_Relation.viewer.value, + optional_subject_filter=SubjectFilter( + subject_type=ResourceType.anonymous_user.value, + optional_subject_id=SubjectReference(object=_AuthzConverter.anonymous_users()).object.object_id, + ), + ) + ), + WriteSchemaRequest(schema=_v4), + ] + down: list[WriteRelationshipsRequest | DeleteRelationshipsRequest | WriteSchemaRequest] = [ + DeleteRelationshipsRequest( + relationship_filter=RelationshipFilter( + resource_type=ResourceType.project.value, optional_relation=_Relation.public_viewer.value + ) + ), + DeleteRelationshipsRequest( + relationship_filter=RelationshipFilter(resource_type=ResourceType.data_connector.value) + ), + WriteSchemaRequest(schema=_v3), + ] + + all_users_sub = SubjectReference(object=_AuthzConverter.all_users()) + anon_users_sub = SubjectReference(object=_AuthzConverter.anonymous_users()) + for project_id in public_project_ids: + project_res = _AuthzConverter.project(cast(ULID, ULID.from_str(project_id))) + all_users_are_viewers = Relationship( + resource=project_res, + relation=_Relation.public_viewer.value, + subject=all_users_sub, + ) + anon_users_are_viewers = Relationship( + resource=project_res, + relation=_Relation.public_viewer.value, + subject=anon_users_sub, + ) + down_all_users_are_viewers = Relationship( + resource=project_res, + relation=_Relation.viewer.value, + subject=all_users_sub, + ) + down_anon_users_are_viewers = Relationship( + resource=project_res, + relation=_Relation.viewer.value, + subject=anon_users_sub, + ) + up.append( + WriteRelationshipsRequest( + updates=[ + RelationshipUpdate( + operation=RelationshipUpdate.OPERATION_TOUCH, relationship=all_users_are_viewers + ), + RelationshipUpdate( + operation=RelationshipUpdate.OPERATION_TOUCH, relationship=anon_users_are_viewers + ), + ], + ) + ) + down.append( + WriteRelationshipsRequest( + updates=[ + RelationshipUpdate( + operation=RelationshipUpdate.OPERATION_TOUCH, relationship=down_all_users_are_viewers + ), + RelationshipUpdate( + operation=RelationshipUpdate.OPERATION_TOUCH, relationship=down_anon_users_are_viewers + ), + ], + ) + ) + + return AuthzSchemaMigration(up=up, down=down) diff --git a/components/renku_data_services/data_connectors/__init__.py b/components/renku_data_services/data_connectors/__init__.py new file mode 100644 index 000000000..9a7285ee0 --- /dev/null +++ b/components/renku_data_services/data_connectors/__init__.py @@ -0,0 +1 @@ +"""Data connectors module.""" diff --git a/components/renku_data_services/data_connectors/api.spec.yaml b/components/renku_data_services/data_connectors/api.spec.yaml new file mode 100644 index 000000000..85c763b89 --- /dev/null +++ b/components/renku_data_services/data_connectors/api.spec.yaml @@ -0,0 +1,747 @@ +openapi: 3.0.2 +info: + title: Renku Data Services API + description: | + This service is the main backend for Renku. It provides information about users, projects, + cloud storage, access to compute resources and many other things. + version: v1 +servers: + - url: /api/data + - url: /ui-server/api/data +paths: + /data_connectors: + get: + summary: Get all data connectors + parameters: + - in: query + description: query parameters + name: params + style: form + explode: true + schema: + $ref: "#/components/schemas/DataConnectorsGetQuery" + responses: + "200": + description: List of data connectors + content: + "application/json": + schema: + $ref: "#/components/schemas/DataConnectorsList" + headers: + page: + description: The index of the current page (starting at 1). + required: true + schema: + type: integer + per-page: + description: The number of items per page. + required: true + schema: + type: integer + total: + description: The total number of items. + required: true + schema: + type: integer + total-pages: + description: The total number of pages. + required: true + schema: + type: integer + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + post: + summary: Create a new data connector + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorPost" + responses: + "201": + description: The data connector was created + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnector" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + /data_connectors/{data_connector_id}: + parameters: + - in: path + name: data_connector_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the data connector + get: + summary: Get data connector details + responses: + "200": + description: The data connector + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnector" + "404": + description: The data connector does not exist + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + patch: + summary: Update specific fields of an existing data connector + parameters: + - $ref: "#/components/parameters/If-Match" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorPatch" + responses: + "200": + description: The patched data connector + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnector" + "404": + description: The data connector does not exist + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + delete: + summary: Remove a data connector + responses: + "204": + description: The data connector was removed or did not exist in the first place + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + /namespaces/{namespace}/data_connectors/{slug}: + parameters: + - in: path + name: namespace + required: true + schema: + type: string + - in: path + name: slug + required: true + schema: + type: string + get: + summary: Get a data connector by namespace and project slug + responses: + "200": + description: The data connector + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnector" + "404": + description: The data connector does not exist + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + /data_connectors/{data_connector_id}/project_links: + parameters: + - in: path + name: data_connector_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the data connector + get: + summary: Get all links from a given data connector to projects + responses: + "200": + description: List of data connector to project links + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorToProjectLinksList" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + post: + summary: Create a new link from a data connector to a project + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorToProjectLinkPost" + responses: + "201": + description: The data connector was connected to a project + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorToProjectLink" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + /data_connectors/{data_connector_id}/project_links/{link_id}: + parameters: + - in: path + name: data_connector_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the data connector + - in: path + name: link_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the link between a data connector and a project + delete: + summary: Remove a link from a data connector to a project + responses: + "204": + description: The data connector was removed or did not exist in the first place + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + /data_connectors/{data_connector_id}/secrets: + parameters: + - in: path + name: data_connector_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the data connector + get: + summary: Get all saved secrets for a data connector + responses: + "200": + description: The saved storage secrets + content: + "application/json": + schema: + $ref: "#/components/schemas/DataConnectorSecretsList" + "404": + description: Storage was not found + content: + "application/json": + schema: + $ref: "#/components/schemas/ErrorResponse" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + patch: + summary: Save secrets for a data connector + description: New secrets will be added and existing secrets will have their value updated. Using `null` as a value will remove the corresponding secret. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorSecretPatchList" + responses: + "201": + description: The secrets for cloud storage were saved + content: + "application/json": + schema: + $ref: "#/components/schemas/DataConnectorSecretsList" + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors + delete: + summary: Remove all saved secrets for a data connector + responses: + "204": + description: The secrets were removed or did not exist in the first place or the storage doesn't exist + default: + $ref: "#/components/responses/Error" + tags: + - data_connectors +components: + schemas: + DataConnectorsList: + description: A list of data connectors + type: array + items: + $ref: "#/components/schemas/DataConnector" + DataConnector: + description: | + A data connector for Renku 2.0 for mounting remote data storage + type: object + additionalProperties: false + properties: + id: + $ref: "#/components/schemas/Ulid" + name: + $ref: "#/components/schemas/DataConnectorName" + namespace: + $ref: "#/components/schemas/Slug" + slug: + $ref: "#/components/schemas/Slug" + storage: + $ref: "#/components/schemas/CloudStorageCore" + creation_date: + $ref: "#/components/schemas/CreationDate" + created_by: + $ref: "#/components/schemas/UserId" + visibility: + $ref: "#/components/schemas/Visibility" + description: + $ref: "#/components/schemas/Description" + etag: + $ref: "#/components/schemas/ETag" + keywords: + $ref: "#/components/schemas/KeywordsList" + required: + - id + - name + - namespace + - slug + - storage + - creation_date + - created_by + - visibility + - etag + DataConnectorPost: + description: | + A data connector to be created in Renku 2.0 + type: object + additionalProperties: false + properties: + name: + $ref: "#/components/schemas/DataConnectorName" + namespace: + $ref: "#/components/schemas/Slug" + slug: + $ref: "#/components/schemas/Slug" + storage: + oneOf: + - $ref: "#/components/schemas/CloudStorageCorePost" + - $ref: "#/components/schemas/CloudStorageUrlV2" + visibility: + $ref: "#/components/schemas/Visibility" # Visibility is ``private`` if not passed at this point + default: "private" + description: + $ref: "#/components/schemas/Description" + keywords: + $ref: "#/components/schemas/KeywordsList" + required: + - name + - namespace + - storage + DataConnectorPatch: + description: | + Patch of a data connector + type: object + additionalProperties: false + properties: + name: + $ref: "#/components/schemas/DataConnectorName" + namespace: + $ref: "#/components/schemas/Slug" + slug: + $ref: "#/components/schemas/Slug" + storage: + $ref: "#/components/schemas/CloudStorageCorePatch" + visibility: + $ref: "#/components/schemas/Visibility" + description: + $ref: "#/components/schemas/Description" + keywords: + $ref: "#/components/schemas/KeywordsList" + CloudStorageCore: + description: Represents the configuration used to mount remote data storage + type: object + additionalProperties: false + properties: + storage_type: + $ref: "#/components/schemas/StorageType" + configuration: + $ref: "#/components/schemas/RCloneConfig" + source_path: + $ref: "#/components/schemas/SourcePath" + target_path: + $ref: "#/components/schemas/TargetPath" + readonly: + $ref: "#/components/schemas/StorageReadOnly" + sensitive_fields: + type: array + items: + $ref: "#/components/schemas/RCloneOption" + required: + - storage_type + - configuration + - source_path + - target_path + - readonly + - sensitive_fields + CloudStorageCorePost: + type: object + additionalProperties: false + properties: + storage_type: + $ref: "#/components/schemas/StorageType" + configuration: + $ref: "#/components/schemas/RCloneConfig" + source_path: + $ref: "#/components/schemas/SourcePath" + target_path: + $ref: "#/components/schemas/TargetPath" + readonly: + $ref: "#/components/schemas/StorageReadOnly" + default: true + required: + - configuration + - source_path + - target_path + CloudStorageCorePatch: + type: object + additionalProperties: false + properties: + storage_type: + $ref: "#/components/schemas/StorageType" + configuration: + $ref: "#/components/schemas/RCloneConfig" + source_path: + $ref: "#/components/schemas/SourcePath" + target_path: + $ref: "#/components/schemas/TargetPath" + readonly: + $ref: "#/components/schemas/StorageReadOnly" + RCloneConfig: + type: object + description: Dictionary of rclone key:value pairs (based on schema from '/storage_schema') + additionalProperties: + oneOf: + - type: integer + - type: string + nullable: true + - type: boolean + - type: object + CloudStorageUrlV2: + type: object + properties: + storage_url: + type: string + target_path: + $ref: "#/components/schemas/TargetPath" + readonly: + $ref: "#/components/schemas/StorageReadOnly" + default: true + required: + - storage_url + - target_path + example: + storage_url: s3://giab + DataConnectorToProjectLinksList: + description: A list of links from a data connector to a project + type: array + items: + $ref: "#/components/schemas/DataConnectorToProjectLink" + DataConnectorToProjectLink: + description: A link from a data connector to a project in Renku 2.0 + type: object + additionalProperties: false + properties: + id: + $ref: "#/components/schemas/Ulid" + data_connector_id: + $ref: "#/components/schemas/Ulid" + project_id: + $ref: "#/components/schemas/Ulid" + creation_date: + $ref: "#/components/schemas/CreationDate" + created_by: + $ref: "#/components/schemas/UserId" + required: + - id + - data_connector_id + - project_id + - creation_date + - created_by + DataConnectorToProjectLinkPost: + description: A link to be created from a data connector to a project in Renku 2.0 + type: object + additionalProperties: false + properties: + project_id: + $ref: "#/components/schemas/Ulid" + required: + - project_id + DataConnectorSecretsList: + description: A list of data connectors + type: array + items: + $ref: "#/components/schemas/DataConnectorSecret" + DataConnectorSecret: + description: Information about a credential saved for a data connector + type: object + properties: + name: + $ref: "#/components/schemas/DataConnectorSecretFieldName" + secret_id: + $ref: "#/components/schemas/Ulid" + required: + - name + - secret_id + DataConnectorSecretPatchList: + description: List of secrets to be saved for a data connector + type: array + items: + $ref: "#/components/schemas/DataConnectorSecretPatch" + DataConnectorSecretPatch: + description: Information about a credential to save for a data connector + properties: + name: + $ref: "#/components/schemas/DataConnectorSecretFieldName" + value: + $ref: "#/components/schemas/SecretValueNullable" + required: + - name + - value + DataConnectorSecretFieldName: + description: Name of the credential field + type: string + minLength: 1 + maxLength: 99 + example: "secret_key" + SecretValueNullable: + description: Secret value that can be any text + type: string + minLength: 1 + maxLength: 5000 + nullable: true + RCloneOption: + type: object + description: Single field on an RClone storage, like "remote" or "access_key_id" + properties: + name: + type: string + description: name of the option + help: + type: string + description: help text for the option + provider: + type: string + description: The cloud provider the option is for (See 'provider' RCloneOption in the schema for potential values) + example: AWS + default: + oneOf: + - type: number + - type: string + - type: boolean + - type: object + - type: array + description: default value for the option + default_str: + type: string + description: string representation of the default value + examples: + description: "These list potential values for this option, like an enum. With `exclusive: true`, only a value from the list is allowed." + type: array + items: + type: object + properties: + value: + type: string + description: a potential value for the option (think enum) + help: + type: string + description: help text for the value + provider: + type: string + description: The provider this value is applicable for. Empty if valid for all providers. + required: + type: boolean + description: whether the option is required or not + ispassword: + type: boolean + description: whether the field is a password (use **** for display) + sensitive: + type: boolean + description: whether the value is sensitive (not stored in the service). Do not send this in requests to the service. + advanced: + type: boolean + description: whether this is an advanced config option (probably don't show these to users) + exclusive: + type: boolean + description: if true, only values from 'examples' can be used + datatype: + type: string + description: data type of option value. RClone has more options but they map to the ones listed here. + enum: ["int", "bool", "string", "Time"] + Ulid: + description: ULID identifier + type: string + minLength: 26 + maxLength: 26 + pattern: "^[0-7][0-9A-HJKMNP-TV-Z]{25}$" # This is case-insensitive + Slug: + description: A command-line/url friendly name for a namespace + type: string + minLength: 1 + maxLength: 99 + # Slug regex rules + # from https://docs.gitlab.com/ee/user/reserved_names.html#limitations-on-usernames-project-and-group-names + # - cannot end in .git + # - cannot end in .atom + # - cannot contain any combination of two or more consecutive -._ + # - has to start with letter or number + pattern: '^(?!.*\.git$|.*\.atom$|.*[\-._][\-._].*)[a-zA-Z0-9][a-zA-Z0-9\-_.]*$' + example: "a-slug-example" + CreationDate: + description: The date and time the resource was created (in UTC and ISO-8601 format) + type: string + format: date-time + example: "2023-11-01T17:32:28Z" + UserId: + type: string + description: Keycloak user ID + example: f74a228b-1790-4276-af5f-25c2424e9b0c + pattern: "^[A-Za-z0-9]{1}[A-Za-z0-9-]+$" + Visibility: + description: Project's visibility levels + type: string + enum: + - private + - public + Description: + description: A description for the resource + type: string + maxLength: 500 + KeywordsList: + description: Project keywords + type: array + items: + $ref: "#/components/schemas/Keyword" + minItems: 0 + example: + - "project" + - "keywords" + Keyword: + description: A single keyword + type: string + minLength: 1 + maxLength: 99 + pattern: '^[A-Za-z0-9\s\-_.]*$' + DataConnectorName: + description: Renku data connector name + type: string + minLength: 1 + maxLength: 99 + example: "My Remote Data :)" + SourcePath: + description: the source path to mount, usually starts with bucket/container name + type: string + example: bucket/my/storage/folder/ + TargetPath: + description: the target path relative to the working directory where the storage should be mounted + type: string + example: my/project/folder + StorageType: + description: same as rclone prefix/ rclone config type. Ignored in requests, but returned in responses for convenience. + type: string + readOnly: true + StorageReadOnly: + description: Whether this storage should be mounted readonly or not + type: boolean + default: true + ETag: + type: string + description: Entity Tag + example: "9EE498F9D565D0C41E511377425F32F3" + DataConnectorsGetQuery: + description: Query params for data connectors get request + allOf: + - $ref: "#/components/schemas/PaginationRequest" + - properties: + namespace: + description: A namespace, used as a filter. + type: string + default: "" + PaginationRequest: + type: object + additionalProperties: false + properties: + page: + description: Result's page number starting from 1 + type: integer + minimum: 1 + default: 1 + per_page: + description: The number of results per page + type: integer + minimum: 1 + maximum: 100 + default: 20 + ErrorResponse: + type: object + properties: + error: + type: object + properties: + code: + type: integer + minimum: 0 + exclusiveMinimum: true + example: 1404 + detail: + type: string + example: "A more detailed optional message showing what the problem was" + message: + type: string + example: "Something went wrong - please try again later" + required: ["code", "message"] + required: ["error"] + + responses: + Error: + description: The schema for all 4xx and 5xx responses + content: + "application/json": + schema: + $ref: "#/components/schemas/ErrorResponse" + + parameters: + If-Match: + in: header + name: If-Match + description: If-Match header, for avoiding mid-air collisions + required: true + schema: + $ref: "#/components/schemas/ETag" + + securitySchemes: + oidc: + type: openIdConnect + openIdConnectUrl: /auth/realms/Renku/.well-known/openid-configuration +security: + - oidc: + - openid diff --git a/components/renku_data_services/data_connectors/apispec.py b/components/renku_data_services/data_connectors/apispec.py new file mode 100644 index 000000000..41931cc73 --- /dev/null +++ b/components/renku_data_services/data_connectors/apispec.py @@ -0,0 +1,450 @@ +# generated by datamodel-codegen: +# filename: api.spec.yaml +# timestamp: 2024-10-03T13:29:06+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import ConfigDict, Field, RootModel +from renku_data_services.data_connectors.apispec_base import BaseAPISpec + + +class Example(BaseAPISpec): + value: Optional[str] = Field( + None, description="a potential value for the option (think enum)" + ) + help: Optional[str] = Field(None, description="help text for the value") + provider: Optional[str] = Field( + None, + description="The provider this value is applicable for. Empty if valid for all providers.", + ) + + +class Datatype(Enum): + int = "int" + bool = "bool" + string = "string" + Time = "Time" + + +class RCloneOption(BaseAPISpec): + name: Optional[str] = Field(None, description="name of the option") + help: Optional[str] = Field(None, description="help text for the option") + provider: Optional[str] = Field( + None, + description="The cloud provider the option is for (See 'provider' RCloneOption in the schema for potential values)", + example="AWS", + ) + default: Optional[Union[float, str, bool, Dict[str, Any], List]] = Field( + None, description="default value for the option" + ) + default_str: Optional[str] = Field( + None, description="string representation of the default value" + ) + examples: Optional[List[Example]] = Field( + None, + description="These list potential values for this option, like an enum. With `exclusive: true`, only a value from the list is allowed.", + ) + required: Optional[bool] = Field( + None, description="whether the option is required or not" + ) + ispassword: Optional[bool] = Field( + None, description="whether the field is a password (use **** for display)" + ) + sensitive: Optional[bool] = Field( + None, + description="whether the value is sensitive (not stored in the service). Do not send this in requests to the service.", + ) + advanced: Optional[bool] = Field( + None, + description="whether this is an advanced config option (probably don't show these to users)", + ) + exclusive: Optional[bool] = Field( + None, description="if true, only values from 'examples' can be used" + ) + datatype: Optional[Datatype] = Field( + None, + description="data type of option value. RClone has more options but they map to the ones listed here.", + ) + + +class Visibility(Enum): + private = "private" + public = "public" + + +class Keyword(RootModel[str]): + root: str = Field( + ..., + description="A single keyword", + max_length=99, + min_length=1, + pattern="^[A-Za-z0-9\\s\\-_.]*$", + ) + + +class PaginationRequest(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + page: int = Field(1, description="Result's page number starting from 1", ge=1) + per_page: int = Field( + 20, description="The number of results per page", ge=1, le=100 + ) + + +class Error(BaseAPISpec): + code: int = Field(..., example=1404, gt=0) + detail: Optional[str] = Field( + None, example="A more detailed optional message showing what the problem was" + ) + message: str = Field(..., example="Something went wrong - please try again later") + + +class ErrorResponse(BaseAPISpec): + error: Error + + +class CloudStorageCore(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + storage_type: str = Field( + ..., + description="same as rclone prefix/ rclone config type. Ignored in requests, but returned in responses for convenience.", + ) + configuration: Dict[str, Union[int, Optional[str], bool, Dict[str, Any]]] + source_path: str = Field( + ..., + description="the source path to mount, usually starts with bucket/container name", + example="bucket/my/storage/folder/", + ) + target_path: str = Field( + ..., + description="the target path relative to the working directory where the storage should be mounted", + example="my/project/folder", + ) + readonly: bool = Field( + ..., description="Whether this storage should be mounted readonly or not" + ) + sensitive_fields: List[RCloneOption] + + +class CloudStorageCorePost(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + storage_type: Optional[str] = Field( + None, + description="same as rclone prefix/ rclone config type. Ignored in requests, but returned in responses for convenience.", + ) + configuration: Dict[str, Union[int, Optional[str], bool, Dict[str, Any]]] + source_path: str = Field( + ..., + description="the source path to mount, usually starts with bucket/container name", + example="bucket/my/storage/folder/", + ) + target_path: str = Field( + ..., + description="the target path relative to the working directory where the storage should be mounted", + example="my/project/folder", + ) + readonly: bool = Field( + True, description="Whether this storage should be mounted readonly or not" + ) + + +class CloudStorageCorePatch(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + storage_type: Optional[str] = Field( + None, + description="same as rclone prefix/ rclone config type. Ignored in requests, but returned in responses for convenience.", + ) + configuration: Optional[ + Dict[str, Union[int, Optional[str], bool, Dict[str, Any]]] + ] = None + source_path: Optional[str] = Field( + None, + description="the source path to mount, usually starts with bucket/container name", + example="bucket/my/storage/folder/", + ) + target_path: Optional[str] = Field( + None, + description="the target path relative to the working directory where the storage should be mounted", + example="my/project/folder", + ) + readonly: Optional[bool] = Field( + None, description="Whether this storage should be mounted readonly or not" + ) + + +class CloudStorageUrlV2(BaseAPISpec): + storage_url: str + target_path: str = Field( + ..., + description="the target path relative to the working directory where the storage should be mounted", + example="my/project/folder", + ) + readonly: bool = Field( + True, description="Whether this storage should be mounted readonly or not" + ) + + +class DataConnectorToProjectLink(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + data_connector_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + project_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + creation_date: datetime = Field( + ..., + description="The date and time the resource was created (in UTC and ISO-8601 format)", + example="2023-11-01T17:32:28Z", + ) + created_by: str = Field( + ..., + description="Keycloak user ID", + example="f74a228b-1790-4276-af5f-25c2424e9b0c", + pattern="^[A-Za-z0-9]{1}[A-Za-z0-9-]+$", + ) + + +class DataConnectorToProjectLinkPost(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + project_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + + +class DataConnectorSecret(BaseAPISpec): + name: str = Field( + ..., + description="Name of the credential field", + example="secret_key", + max_length=99, + min_length=1, + ) + secret_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + + +class DataConnectorSecretPatch(BaseAPISpec): + name: str = Field( + ..., + description="Name of the credential field", + example="secret_key", + max_length=99, + min_length=1, + ) + value: Optional[str] = Field( + ..., + description="Secret value that can be any text", + max_length=5000, + min_length=1, + ) + + +class DataConnectorsGetQuery(PaginationRequest): + namespace: str = Field("", description="A namespace, used as a filter.") + + +class DataConnectorsGetParametersQuery(BaseAPISpec): + params: Optional[DataConnectorsGetQuery] = None + + +class DataConnector(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + name: str = Field( + ..., + description="Renku data connector name", + example="My Remote Data :)", + max_length=99, + min_length=1, + ) + namespace: str = Field( + ..., + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + slug: str = Field( + ..., + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + storage: CloudStorageCore + creation_date: datetime = Field( + ..., + description="The date and time the resource was created (in UTC and ISO-8601 format)", + example="2023-11-01T17:32:28Z", + ) + created_by: str = Field( + ..., + description="Keycloak user ID", + example="f74a228b-1790-4276-af5f-25c2424e9b0c", + pattern="^[A-Za-z0-9]{1}[A-Za-z0-9-]+$", + ) + visibility: Visibility + description: Optional[str] = Field( + None, description="A description for the resource", max_length=500 + ) + etag: str = Field( + ..., description="Entity Tag", example="9EE498F9D565D0C41E511377425F32F3" + ) + keywords: Optional[List[Keyword]] = Field( + None, + description="Project keywords", + example=["project", "keywords"], + min_length=0, + ) + + +class DataConnectorPost(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + name: str = Field( + ..., + description="Renku data connector name", + example="My Remote Data :)", + max_length=99, + min_length=1, + ) + namespace: str = Field( + ..., + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + slug: Optional[str] = Field( + None, + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + storage: Union[CloudStorageCorePost, CloudStorageUrlV2] + visibility: Visibility = Visibility.private + description: Optional[str] = Field( + None, description="A description for the resource", max_length=500 + ) + keywords: Optional[List[Keyword]] = Field( + None, + description="Project keywords", + example=["project", "keywords"], + min_length=0, + ) + + +class DataConnectorPatch(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + name: Optional[str] = Field( + None, + description="Renku data connector name", + example="My Remote Data :)", + max_length=99, + min_length=1, + ) + namespace: Optional[str] = Field( + None, + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + slug: Optional[str] = Field( + None, + description="A command-line/url friendly name for a namespace", + example="a-slug-example", + max_length=99, + min_length=1, + pattern="^(?!.*\\.git$|.*\\.atom$|.*[\\-._][\\-._].*)[a-zA-Z0-9][a-zA-Z0-9\\-_.]*$", + ) + storage: Optional[CloudStorageCorePatch] = None + visibility: Optional[Visibility] = None + description: Optional[str] = Field( + None, description="A description for the resource", max_length=500 + ) + keywords: Optional[List[Keyword]] = Field( + None, + description="Project keywords", + example=["project", "keywords"], + min_length=0, + ) + + +class DataConnectorToProjectLinksList(RootModel[List[DataConnectorToProjectLink]]): + root: List[DataConnectorToProjectLink] = Field( + ..., description="A list of links from a data connector to a project" + ) + + +class DataConnectorSecretsList(RootModel[List[DataConnectorSecret]]): + root: List[DataConnectorSecret] = Field( + ..., description="A list of data connectors" + ) + + +class DataConnectorSecretPatchList(RootModel[List[DataConnectorSecretPatch]]): + root: List[DataConnectorSecretPatch] = Field( + ..., description="List of secrets to be saved for a data connector" + ) + + +class DataConnectorsList(RootModel[List[DataConnector]]): + root: List[DataConnector] = Field(..., description="A list of data connectors") diff --git a/components/renku_data_services/data_connectors/apispec_base.py b/components/renku_data_services/data_connectors/apispec_base.py new file mode 100644 index 000000000..476c07927 --- /dev/null +++ b/components/renku_data_services/data_connectors/apispec_base.py @@ -0,0 +1,22 @@ +"""Base models for API specifications.""" + +from pydantic import BaseModel, field_validator +from ulid import ULID + + +class BaseAPISpec(BaseModel): + """Base API specification.""" + + class Config: + """Enables orm mode for pydantic.""" + + from_attributes = True + # NOTE: By default the pydantic library does not use python for regex but a rust crate + # this rust crate does not support lookahead regex syntax but we need it in this component + regex_engine = "python-re" + + @field_validator("id", mode="before", check_fields=False) + @classmethod + def serialize_id(cls, id: str | ULID) -> str: + """Custom serializer that can handle ULIDs.""" + return str(id) diff --git a/components/renku_data_services/data_connectors/blueprints.py b/components/renku_data_services/data_connectors/blueprints.py new file mode 100644 index 000000000..745474f58 --- /dev/null +++ b/components/renku_data_services/data_connectors/blueprints.py @@ -0,0 +1,365 @@ +"""Data connectors blueprint.""" + +from dataclasses import dataclass +from typing import Any + +from sanic import Request +from sanic.response import HTTPResponse, JSONResponse +from sanic_ext import validate +from ulid import ULID + +from renku_data_services import base_models +from renku_data_services.base_api.auth import ( + authenticate, + only_authenticated, +) +from renku_data_services.base_api.blueprint import BlueprintFactoryResponse, CustomBlueprint +from renku_data_services.base_api.etag import extract_if_none_match, if_match_required +from renku_data_services.base_api.misc import validate_body_root_model, validate_query +from renku_data_services.base_api.pagination import PaginationRequest, paginate +from renku_data_services.base_models.validation import validate_and_dump, validated_json +from renku_data_services.data_connectors import apispec, models +from renku_data_services.data_connectors.core import ( + dump_storage_with_sensitive_fields, + validate_data_connector_patch, + validate_data_connector_secrets_patch, + validate_unsaved_data_connector, +) +from renku_data_services.data_connectors.db import ( + DataConnectorProjectLinkRepository, + DataConnectorRepository, + DataConnectorSecretRepository, +) +from renku_data_services.storage.rclone import RCloneValidator + + +@dataclass(kw_only=True) +class DataConnectorsBP(CustomBlueprint): + """Handlers for manipulating data connectors.""" + + data_connector_repo: DataConnectorRepository + data_connector_to_project_link_repo: DataConnectorProjectLinkRepository + data_connector_secret_repo: DataConnectorSecretRepository + authenticator: base_models.Authenticator + + def get_all(self) -> BlueprintFactoryResponse: + """List data connectors.""" + + @authenticate(self.authenticator) + @validate_query(query=apispec.DataConnectorsGetQuery) + @paginate + async def _get_all( + _: Request, + user: base_models.APIUser, + pagination: PaginationRequest, + query: apispec.DataConnectorsGetQuery, + validator: RCloneValidator, + ) -> tuple[list[dict[str, Any]], int]: + data_connectors, total_num = await self.data_connector_repo.get_data_connectors( + user=user, + pagination=pagination, + namespace=query.namespace, + ) + return [ + validate_and_dump( + apispec.DataConnector, + self._dump_data_connector(dc, validator=validator), + ) + for dc in data_connectors + ], total_num + + return "/data_connectors", ["GET"], _get_all + + def post(self) -> BlueprintFactoryResponse: + """Create a new data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + @validate(json=apispec.DataConnectorPost) + async def _post( + _: Request, user: base_models.APIUser, body: apispec.DataConnectorPost, validator: RCloneValidator + ) -> JSONResponse: + data_connector = validate_unsaved_data_connector(body, validator=validator) + result = await self.data_connector_repo.insert_data_connector(user=user, data_connector=data_connector) + return validated_json( + apispec.DataConnector, + self._dump_data_connector(result, validator=validator), + status=201, + ) + + return "/data_connectors", ["POST"], _post + + def get_one(self) -> BlueprintFactoryResponse: + """Get a specific data connector.""" + + @authenticate(self.authenticator) + @extract_if_none_match + async def _get_one( + _: Request, user: base_models.APIUser, data_connector_id: ULID, etag: str | None, validator: RCloneValidator + ) -> HTTPResponse: + data_connector = await self.data_connector_repo.get_data_connector( + user=user, data_connector_id=data_connector_id + ) + + if data_connector.etag == etag: + return HTTPResponse(status=304) + + headers = {"ETag": data_connector.etag} + return validated_json( + apispec.DataConnector, + self._dump_data_connector(data_connector, validator=validator), + headers=headers, + ) + + return "/data_connectors/", ["GET"], _get_one + + def get_one_by_slug(self) -> BlueprintFactoryResponse: + """Get a specific data connector by namespace/entity slug.""" + + @authenticate(self.authenticator) + @extract_if_none_match + async def _get_one_by_slug( + _: Request, + user: base_models.APIUser, + namespace: str, + slug: str, + etag: str | None, + validator: RCloneValidator, + ) -> HTTPResponse: + data_connector = await self.data_connector_repo.get_data_connector_by_slug( + user=user, namespace=namespace, slug=slug + ) + + if data_connector.etag == etag: + return HTTPResponse(status=304) + + headers = {"ETag": data_connector.etag} + return validated_json( + apispec.DataConnector, + self._dump_data_connector(data_connector, validator=validator), + headers=headers, + ) + + return "/namespaces//data_connectors/", ["GET"], _get_one_by_slug + + def patch(self) -> BlueprintFactoryResponse: + """Partially update a data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + @if_match_required + @validate(json=apispec.DataConnectorPatch) + async def _patch( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + body: apispec.DataConnectorPatch, + etag: str, + validator: RCloneValidator, + ) -> JSONResponse: + existing_dc = await self.data_connector_repo.get_data_connector( + user=user, data_connector_id=data_connector_id + ) + dc_patch = validate_data_connector_patch(existing_dc, body, validator=validator) + data_connector_update = await self.data_connector_repo.update_data_connector( + user=user, data_connector_id=data_connector_id, patch=dc_patch, etag=etag + ) + + return validated_json( + apispec.DataConnector, + self._dump_data_connector(data_connector_update.new, validator=validator), + ) + + return "/data_connectors/", ["PATCH"], _patch + + def delete(self) -> BlueprintFactoryResponse: + """Delete a data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + async def _delete( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> HTTPResponse: + await self.data_connector_repo.delete_data_connector(user=user, data_connector_id=data_connector_id) + return HTTPResponse(status=204) + + return "/data_connectors/", ["DELETE"], _delete + + def get_all_project_links(self) -> BlueprintFactoryResponse: + """List all links from a given data connector to projects.""" + + @authenticate(self.authenticator) + async def _get_all_project_links( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> JSONResponse: + links = await self.data_connector_to_project_link_repo.get_links_from( + user=user, data_connector_id=data_connector_id + ) + return validated_json( + apispec.DataConnectorToProjectLinksList, + [self._dump_data_connector_to_project_link(link) for link in links], + ) + + return "/data_connectors//project_links", ["GET"], _get_all_project_links + + def post_project_link(self) -> BlueprintFactoryResponse: + """Create a new link from a data connector to a project.""" + + @authenticate(self.authenticator) + @only_authenticated + @validate(json=apispec.DataConnectorToProjectLinkPost) + async def _post_project_link( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + body: apispec.DataConnectorToProjectLinkPost, + ) -> JSONResponse: + unsaved_link = models.UnsavedDataConnectorToProjectLink( + data_connector_id=data_connector_id, + project_id=ULID.from_str(body.project_id), + ) + link = await self.data_connector_to_project_link_repo.insert_link(user=user, link=unsaved_link) + return validated_json( + apispec.DataConnectorToProjectLink, self._dump_data_connector_to_project_link(link), status=201 + ) + + return "/data_connectors//project_links", ["POST"], _post_project_link + + def delete_project_link(self) -> BlueprintFactoryResponse: + """Delete a link from a data connector to a project.""" + + @authenticate(self.authenticator) + @only_authenticated + async def _delete_project_link( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + link_id: ULID, + ) -> HTTPResponse: + await self.data_connector_to_project_link_repo.delete_link( + user=user, data_connector_id=data_connector_id, link_id=link_id + ) + return HTTPResponse(status=204) + + return ( + "/data_connectors//project_links/", + ["DELETE"], + _delete_project_link, + ) + + def get_all_data_connectors_links_to_project(self) -> BlueprintFactoryResponse: + """List all links from data connectors to a given project.""" + + @authenticate(self.authenticator) + async def _get_all_data_connectors_links_to_project( + _: Request, + user: base_models.APIUser, + project_id: ULID, + ) -> JSONResponse: + links = await self.data_connector_to_project_link_repo.get_links_to(user=user, project_id=project_id) + return validated_json( + apispec.DataConnectorToProjectLinksList, + [self._dump_data_connector_to_project_link(link) for link in links], + ) + + return "/projects//data_connector_links", ["GET"], _get_all_data_connectors_links_to_project + + def get_secrets(self) -> BlueprintFactoryResponse: + """List all saved secrets for a data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + async def _get_secrets( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> JSONResponse: + secrets = await self.data_connector_secret_repo.get_data_connector_secrets( + user=user, data_connector_id=data_connector_id + ) + return validated_json( + apispec.DataConnectorSecretsList, [self._dump_data_connector_secret(secret) for secret in secrets] + ) + + return "/data_connectors//secrets", ["GET"], _get_secrets + + def patch_secrets(self) -> BlueprintFactoryResponse: + """Create, update or delete saved secrets for a data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + @validate_body_root_model(json=apispec.DataConnectorSecretPatchList) + async def _patch_secrets( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + body: apispec.DataConnectorSecretPatchList, + ) -> JSONResponse: + unsaved_secrets = validate_data_connector_secrets_patch(put=body) + secrets = await self.data_connector_secret_repo.patch_data_connector_secrets( + user=user, data_connector_id=data_connector_id, secrets=unsaved_secrets + ) + return validated_json( + apispec.DataConnectorSecretsList, [self._dump_data_connector_secret(secret) for secret in secrets] + ) + + return "/data_connectors//secrets", ["PATCH"], _patch_secrets + + def delete_secrets(self) -> BlueprintFactoryResponse: + """Delete all saved secrets for a data connector.""" + + @authenticate(self.authenticator) + @only_authenticated + async def _delete_secrets( + _: Request, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> HTTPResponse: + await self.data_connector_secret_repo.delete_data_connector_secrets( + user=user, data_connector_id=data_connector_id + ) + return HTTPResponse(status=204) + + return "/data_connectors//secrets", ["DELETE"], _delete_secrets + + @staticmethod + def _dump_data_connector(data_connector: models.DataConnector, validator: RCloneValidator) -> dict[str, Any]: + """Dumps a data connector for API responses.""" + storage = dump_storage_with_sensitive_fields(data_connector.storage, validator=validator) + return dict( + id=str(data_connector.id), + name=data_connector.name, + namespace=data_connector.namespace.slug, + slug=data_connector.slug, + storage=storage, + # secrets=, + creation_date=data_connector.creation_date, + created_by=data_connector.created_by, + visibility=data_connector.visibility.value, + description=data_connector.description, + etag=data_connector.etag, + keywords=data_connector.keywords or [], + ) + + @staticmethod + def _dump_data_connector_to_project_link(link: models.DataConnectorToProjectLink) -> dict[str, Any]: + """Dumps a link from a data connector to a project for API responses.""" + return dict( + id=str(link.id), + data_connector_id=str(link.data_connector_id), + project_id=str(link.project_id), + creation_date=link.creation_date, + created_by=link.created_by, + ) + + @staticmethod + def _dump_data_connector_secret(secret: models.DataConnectorSecret) -> dict[str, Any]: + """Dumps a data connector secret for API responses.""" + return dict( + name=secret.name, + secret_id=str(secret.secret_id), + ) diff --git a/components/renku_data_services/data_connectors/core.py b/components/renku_data_services/data_connectors/core.py new file mode 100644 index 000000000..64a65ccdb --- /dev/null +++ b/components/renku_data_services/data_connectors/core.py @@ -0,0 +1,140 @@ +"""Business logic for data connectors.""" + +from dataclasses import asdict +from typing import Any + +from renku_data_services import base_models, errors +from renku_data_services.authz.models import Visibility +from renku_data_services.data_connectors import apispec, models +from renku_data_services.storage import models as storage_models +from renku_data_services.storage.rclone import RCloneValidator + + +def dump_storage_with_sensitive_fields( + storage: models.CloudStorageCore, validator: RCloneValidator +) -> models.CloudStorageCoreWithSensitiveFields: + """Add sensitive fields to a storage configuration.""" + return models.CloudStorageCoreWithSensitiveFields( + sensitive_fields=list(validator.get_private_fields(storage.configuration)), **asdict(storage) + ) + + +def validate_unsaved_storage( + storage: apispec.CloudStorageCorePost | apispec.CloudStorageUrlV2, validator: RCloneValidator +) -> models.CloudStorageCore: + """Validate the storage configuration of an unsaved data connector.""" + + configuration: dict[str, Any] + source_path: str + + if isinstance(storage, apispec.CloudStorageUrlV2): + cloud_storage = storage_models.UnsavedCloudStorage.from_url( + project_id="FAKEPROJECTID", + name="fake-storage-name", + storage_url=storage.storage_url, + target_path=storage.target_path, + readonly=storage.readonly, + ) + configuration = cloud_storage.configuration.config + source_path = cloud_storage.source_path + else: + configuration = storage.configuration + source_path = storage.source_path + + validator.validate(configuration) + + return models.CloudStorageCore( + storage_type=configuration["type"], + configuration=configuration, + source_path=source_path, + target_path=storage.target_path, + readonly=storage.readonly, + ) + + +def validate_unsaved_data_connector( + body: apispec.DataConnectorPost, validator: RCloneValidator +) -> models.UnsavedDataConnector: + """Validate an unsaved data connector.""" + + keywords = [kw.root for kw in body.keywords] if body.keywords is not None else [] + storage = validate_unsaved_storage(body.storage, validator=validator) + + return models.UnsavedDataConnector( + name=body.name, + namespace=body.namespace, + slug=body.slug or base_models.Slug.from_name(body.name).value, + visibility=Visibility(body.visibility.value), + created_by="", + storage=storage, + description=body.description, + keywords=keywords, + ) + + +def validate_storage_patch( + storage: models.CloudStorageCore, patch: apispec.CloudStorageCorePatch, validator: RCloneValidator +) -> models.CloudStorageCorePatch: + """Validate the update to a data connector's storage.""" + + if patch.configuration is not None: + # we need to apply the patch to the existing storage to properly validate it + patch.configuration = {**storage.configuration, **patch.configuration} + dict_items = list(patch.configuration.items()) + for k, v in dict_items: + if v is None: + # delete fields that were unset + del patch.configuration[k] + validator.validate(patch.configuration) + + return models.CloudStorageCorePatch( + storage_type=patch.storage_type, + configuration=patch.configuration, + source_path=patch.source_path, + target_path=patch.target_path, + readonly=patch.readonly, + ) + + +def validate_data_connector_patch( + data_connector: models.DataConnector, + patch: apispec.DataConnectorPatch, + validator: RCloneValidator, +) -> models.DataConnectorPatch: + """Validate the update to a data connector.""" + + keywords = [kw.root for kw in patch.keywords] if patch.keywords is not None else None + storage = ( + validate_storage_patch(data_connector.storage, patch.storage, validator=validator) + if patch.storage is not None + else None + ) + + return models.DataConnectorPatch( + name=patch.name, + namespace=patch.namespace, + slug=patch.slug, + visibility=Visibility(patch.visibility.value) if patch.visibility is not None else None, + description=patch.description, + keywords=keywords, + storage=storage, + ) + + +def validate_data_connector_secrets_patch( + put: apispec.DataConnectorSecretPatchList, +) -> list[models.DataConnectorSecretUpdate]: + """Validate the update to a data connector's secrets.""" + seen_names: set[str] = set() + for secret in put.root: + if secret.name in seen_names: + raise errors.ValidationError(message=f"Found duplicate name '{secret.name}' in the list of secrets.") + seen_names.add(secret.name) + + return [ + models.DataConnectorSecretUpdate( + name=secret.name, + value=secret.value, + ) + for secret in put.root + ] diff --git a/components/renku_data_services/data_connectors/db.py b/components/renku_data_services/data_connectors/db.py new file mode 100644 index 000000000..6adef7993 --- /dev/null +++ b/components/renku_data_services/data_connectors/db.py @@ -0,0 +1,575 @@ +"""Adapters for data connectors database classes.""" + +from collections.abc import Callable +from typing import TypeVar + +from cryptography.hazmat.primitives.asymmetric import rsa +from sqlalchemy import Select, delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession +from ulid import ULID + +from renku_data_services import base_models, errors +from renku_data_services.authz.authz import Authz, AuthzOperation, ResourceType +from renku_data_services.authz.models import Scope +from renku_data_services.base_api.pagination import PaginationRequest +from renku_data_services.data_connectors import apispec, models +from renku_data_services.data_connectors import orm as schemas +from renku_data_services.namespace import orm as ns_schemas +from renku_data_services.secrets import orm as secrets_schemas +from renku_data_services.secrets.core import encrypt_user_secret +from renku_data_services.secrets.models import SecretKind +from renku_data_services.users.db import UserRepo +from renku_data_services.utils.core import with_db_transaction + + +class DataConnectorRepository: + """Repository for data connectors.""" + + def __init__( + self, + session_maker: Callable[..., AsyncSession], + authz: Authz, + ) -> None: + self.session_maker = session_maker + self.authz = authz + + async def get_data_connectors( + self, user: base_models.APIUser, pagination: PaginationRequest, namespace: str | None = None + ) -> tuple[list[models.DataConnector], int]: + """Get multiple data connectors from the database.""" + data_connector_ids = await self.authz.resources_with_permission( + user, user.id, ResourceType.data_connector, Scope.READ + ) + + async with self.session_maker() as session: + stmt = select(schemas.DataConnectorORM).where(schemas.DataConnectorORM.id.in_(data_connector_ids)) + if namespace: + stmt = _filter_by_namespace_slug(stmt, namespace) + stmt = stmt.limit(pagination.per_page).offset(pagination.offset) + stmt = stmt.order_by(schemas.DataConnectorORM.id.desc()) + stmt_count = ( + select(func.count()) + .select_from(schemas.DataConnectorORM) + .where(schemas.DataConnectorORM.id.in_(data_connector_ids)) + ) + if namespace: + stmt_count = _filter_by_namespace_slug(stmt_count, namespace) + results = await session.scalars(stmt), await session.scalar(stmt_count) + data_connectors = results[0].all() + total_elements = results[1] or 0 + return [dc.dump() for dc in data_connectors], total_elements + + async def get_data_connector( + self, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> models.DataConnector: + """Get one data connector from the database.""" + not_found_msg = f"Data connector with id '{data_connector_id}' does not exist or you do not have access to it." + + authorized = await self.authz.has_permission(user, ResourceType.data_connector, data_connector_id, Scope.READ) + if not authorized: + raise errors.MissingResourceError(message=not_found_msg) + + async with self.session_maker() as session: + result = await session.scalars( + select(schemas.DataConnectorORM).where(schemas.DataConnectorORM.id == data_connector_id) + ) + data_connector = result.one_or_none() + if data_connector is None: + raise errors.MissingResourceError(message=not_found_msg) + return data_connector.dump() + + async def get_data_connector_by_slug( + self, user: base_models.APIUser, namespace: str, slug: str + ) -> models.DataConnector: + """Get one data connector from the database by slug.""" + not_found_msg = ( + f"Data connector with identifier '{namespace}/{slug}' does not exist or you do not have access to it." + ) + + async with self.session_maker() as session: + stmt = select(schemas.DataConnectorORM) + stmt = _filter_by_namespace_slug(stmt, namespace) + stmt = stmt.where(ns_schemas.EntitySlugORM.slug == slug.lower()) + result = await session.scalars(stmt) + data_connector = result.one_or_none() + if data_connector is None: + raise errors.MissingResourceError(message=not_found_msg) + + authorized = await self.authz.has_permission( + user=user, + resource_type=ResourceType.data_connector, + resource_id=data_connector.id, + scope=Scope.READ, + ) + if not authorized: + raise errors.MissingResourceError(message=not_found_msg) + + return data_connector.dump() + + @with_db_transaction + @Authz.authz_change(AuthzOperation.create, ResourceType.data_connector) + async def insert_data_connector( + self, + user: base_models.APIUser, + data_connector: models.UnsavedDataConnector, + *, + session: AsyncSession | None = None, + ) -> models.DataConnector: + """Insert a new data connector entry.""" + if not session: + raise errors.ProgrammingError(message="A database session is required.") + ns = await session.scalar( + select(ns_schemas.NamespaceORM).where(ns_schemas.NamespaceORM.slug == data_connector.namespace.lower()) + ) + if not ns: + raise errors.MissingResourceError( + message=f"The data connector cannot be created because the namespace {data_connector.namespace} does not exist." # noqa E501 + ) + if not ns.group_id and not ns.user_id: + raise errors.ProgrammingError(message="Found a namespace that has no group or user associated with it.") + + if user.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + + resource_type, resource_id = ( + (ResourceType.group, ns.group_id) if ns.group and ns.group_id else (ResourceType.user_namespace, ns.id) + ) + has_permission = await self.authz.has_permission(user, resource_type, resource_id, Scope.WRITE) + if not has_permission: + raise errors.ForbiddenError( + message=f"The data connector cannot be created because you do not have sufficient permissions with the namespace {data_connector.namespace}" # noqa: E501 + ) + + slug = data_connector.slug or base_models.Slug.from_name(data_connector.name).value + + existing_slug = await session.scalar( + select(ns_schemas.EntitySlugORM) + .where(ns_schemas.EntitySlugORM.namespace_id == ns.id) + .where(ns_schemas.EntitySlugORM.slug == slug) + ) + if existing_slug is not None: + raise errors.ConflictError(message=f"An entity with the slug '{ns.slug}/{slug}' already exists.") + + visibility_orm = ( + apispec.Visibility(data_connector.visibility) + if isinstance(data_connector.visibility, str) + else apispec.Visibility(data_connector.visibility.value) + ) + data_connector_orm = schemas.DataConnectorORM( + name=data_connector.name, + visibility=visibility_orm, + storage_type=data_connector.storage.storage_type, + configuration=data_connector.storage.configuration, + source_path=data_connector.storage.source_path, + target_path=data_connector.storage.target_path, + readonly=data_connector.storage.readonly, + created_by_id=user.id, + description=data_connector.description, + keywords=data_connector.keywords, + ) + data_connector_slug = ns_schemas.EntitySlugORM.create_data_connector_slug( + slug, data_connector_id=data_connector_orm.id, namespace_id=ns.id + ) + + session.add(data_connector_orm) + session.add(data_connector_slug) + await session.flush() + await session.refresh(data_connector_orm) + + return data_connector_orm.dump() + + @with_db_transaction + @Authz.authz_change(AuthzOperation.update, ResourceType.data_connector) + async def update_data_connector( + self, + user: base_models.APIUser, + data_connector_id: ULID, + patch: models.DataConnectorPatch, + etag: str, + *, + session: AsyncSession | None = None, + ) -> models.DataConnectorUpdate: + """Update a data connector entry.""" + not_found_msg = f"Data connector with id '{data_connector_id}' does not exist or you do not have access to it." + + if not session: + raise errors.ProgrammingError(message="A database session is required.") + result = await session.scalars( + select(schemas.DataConnectorORM).where(schemas.DataConnectorORM.id == data_connector_id) + ) + data_connector = result.one_or_none() + if data_connector is None: + raise errors.MissingResourceError(message=not_found_msg) + old_data_connector = data_connector.dump() + + required_scope = Scope.WRITE + if patch.visibility is not None and patch.visibility != old_data_connector.visibility: + # NOTE: changing the visibility requires the user to be owner which means they should have DELETE permission + required_scope = Scope.DELETE + if patch.namespace is not None and patch.namespace != old_data_connector.namespace.slug: + # NOTE: changing the namespace requires the user to be owner which means they should have DELETE permission # noqa E501 + required_scope = Scope.DELETE + authorized = await self.authz.has_permission( + user, ResourceType.data_connector, data_connector_id, required_scope + ) + if not authorized: + raise errors.MissingResourceError(message=not_found_msg) + + current_etag = data_connector.dump().etag + if current_etag != etag: + raise errors.ConflictError(message=f"Current ETag is {current_etag}, not {etag}.") + + # TODO: handle slug update + if patch.name is not None: + data_connector.name = patch.name + if patch.visibility is not None: + visibility_orm = ( + apispec.Visibility(patch.visibility) + if isinstance(patch.visibility, str) + else apispec.Visibility(patch.visibility.value) + ) + data_connector.visibility = visibility_orm + if patch.namespace is not None: + ns = await session.scalar( + select(ns_schemas.NamespaceORM).where(ns_schemas.NamespaceORM.slug == patch.namespace.lower()) + ) + if not ns: + raise errors.MissingResourceError(message=f"The namespace with slug {patch.namespace} does not exist.") + if not ns.group_id and not ns.user_id: + raise errors.ProgrammingError(message="Found a namespace that has no group or user associated with it.") + resource_type, resource_id = ( + (ResourceType.group, ns.group_id) if ns.group and ns.group_id else (ResourceType.user_namespace, ns.id) + ) + has_permission = await self.authz.has_permission(user, resource_type, resource_id, Scope.WRITE) + if not has_permission: + raise errors.ForbiddenError( + message=f"The data connector cannot be moved because you do not have sufficient permissions with the namespace {patch.namespace}." # noqa: E501 + ) + data_connector.slug.namespace_id = ns.id + if patch.description is not None: + data_connector.description = patch.description if patch.description else None + if patch.keywords is not None: + data_connector.keywords = patch.keywords if patch.keywords else None + if patch.storage is not None: + if patch.storage.configuration is not None: + data_connector.configuration = patch.storage.configuration + data_connector.storage_type = data_connector.configuration["type"] + if patch.storage.source_path is not None: + data_connector.source_path = patch.storage.source_path + if patch.storage.target_path is not None: + data_connector.target_path = patch.storage.target_path + if patch.storage.readonly is not None: + data_connector.readonly = patch.storage.readonly + + await session.flush() + await session.refresh(data_connector) + + return models.DataConnectorUpdate( + old=old_data_connector, + new=data_connector.dump(), + ) + + @with_db_transaction + @Authz.authz_change(AuthzOperation.delete, ResourceType.data_connector) + async def delete_data_connector( + self, + user: base_models.APIUser, + data_connector_id: ULID, + *, + session: AsyncSession | None = None, + ) -> models.DataConnector | None: + """Delete a data connector.""" + if not session: + raise errors.ProgrammingError(message="A database session is required.") + authorized = await self.authz.has_permission(user, ResourceType.data_connector, data_connector_id, Scope.DELETE) + if not authorized: + raise errors.MissingResourceError( + message=f"Data connector with id '{data_connector_id}' does not exist or you do not have access to it." + ) + + result = await session.scalars( + select(schemas.DataConnectorORM).where(schemas.DataConnectorORM.id == data_connector_id) + ) + data_connector_orm = result.one_or_none() + if data_connector_orm is None: + return None + + data_connector = data_connector_orm.dump() + await session.delete(data_connector_orm) + return data_connector + + +class DataConnectorProjectLinkRepository: + """Repository for links from data connectors to projects.""" + + def __init__( + self, + session_maker: Callable[..., AsyncSession], + authz: Authz, + ) -> None: + self.session_maker = session_maker + self.authz = authz + + async def get_links_from( + self, user: base_models.APIUser, data_connector_id: ULID + ) -> list[models.DataConnectorToProjectLink]: + """Get links from a given data connector.""" + authorized = await self.authz.has_permission(user, ResourceType.data_connector, data_connector_id, Scope.READ) + if not authorized: + raise errors.MissingResourceError( + message=f"Data connector with id '{data_connector_id}' does not exist or you do not have access to it." + ) + + project_ids = await self.authz.resources_with_permission(user, user.id, ResourceType.project, Scope.READ) + + async with self.session_maker() as session: + stmt = ( + select(schemas.DataConnectorToProjectLinkORM) + .where(schemas.DataConnectorToProjectLinkORM.data_connector_id == data_connector_id) + .where(schemas.DataConnectorToProjectLinkORM.project_id.in_(project_ids)) + ) + result = await session.scalars(stmt) + links_orm = result.all() + return [link.dump() for link in links_orm] + + async def get_links_to( + self, user: base_models.APIUser, project_id: ULID + ) -> list[models.DataConnectorToProjectLink]: + """Get links to a given project.""" + authorized = await self.authz.has_permission(user, ResourceType.project, project_id, Scope.READ) + if not authorized: + raise errors.MissingResourceError( + message=f"Project with id '{project_id}' does not exist or you do not have access to it." + ) + + data_connector_ids = await self.authz.resources_with_permission( + user, user.id, ResourceType.data_connector, Scope.READ + ) + + async with self.session_maker() as session: + stmt = ( + select(schemas.DataConnectorToProjectLinkORM) + .where(schemas.DataConnectorToProjectLinkORM.project_id == project_id) + .where(schemas.DataConnectorToProjectLinkORM.data_connector_id.in_(data_connector_ids)) + ) + result = await session.scalars(stmt) + links_orm = result.all() + return [link.dump() for link in links_orm] + + @with_db_transaction + @Authz.authz_change(AuthzOperation.create_link, ResourceType.data_connector) + async def insert_link( + self, + user: base_models.APIUser, + link: models.UnsavedDataConnectorToProjectLink, + *, + session: AsyncSession | None = None, + ) -> models.DataConnectorToProjectLink: + """Insert a new link from a data connector to a project.""" + if not session: + raise errors.ProgrammingError(message="A database session is required.") + + if user.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + + data_connector = ( + await session.scalars( + select(schemas.DataConnectorORM).where(schemas.DataConnectorORM.id == link.data_connector_id) + ) + ).one_or_none() + if data_connector is None: + raise errors.MissingResourceError( + message=f"Data connector with id '{link.data_connector_id}' does not exist or you do not have access to it." # noqa E501 + ) + + project = ( + await session.scalars(select(schemas.ProjectORM).where(schemas.ProjectORM.id == link.project_id)) + ).one_or_none() + if project is None: + raise errors.MissingResourceError( + message=f"Project with id '{link.project_id}' does not exist or you do not have access to it." + ) + + existing_link = await session.scalar( + select(schemas.DataConnectorToProjectLinkORM) + .where(schemas.DataConnectorToProjectLinkORM.data_connector_id == link.data_connector_id) + .where(schemas.DataConnectorToProjectLinkORM.project_id == link.project_id) + ) + if existing_link is not None: + raise errors.ConflictError( + message=f"A link from data connector {link.data_connector_id} to project {link.project_id} already exists." # noqa E501 + ) + + link_orm = schemas.DataConnectorToProjectLinkORM( + data_connector_id=link.data_connector_id, + project_id=link.project_id, + created_by_id=user.id, + ) + + session.add(link_orm) + await session.flush() + await session.refresh(link_orm) + + return link_orm.dump() + + @with_db_transaction + @Authz.authz_change(AuthzOperation.delete_link, ResourceType.data_connector) + async def delete_link( + self, + user: base_models.APIUser, + data_connector_id: ULID, + link_id: ULID, + *, + session: AsyncSession | None = None, + ) -> models.DataConnectorToProjectLink | None: + """Delete a link from a data connector to a project.""" + if not session: + raise errors.ProgrammingError(message="A database session is required.") + + link_orm = ( + await session.scalars( + select(schemas.DataConnectorToProjectLinkORM) + .where(schemas.DataConnectorToProjectLinkORM.id == link_id) + .where(schemas.DataConnectorToProjectLinkORM.data_connector_id == data_connector_id) + ) + ).one_or_none() + if link_orm is None: + return None + + link = link_orm.dump() + await session.delete(link_orm) + return link + + +class DataConnectorSecretRepository: + """Repository for data connector secrets.""" + + def __init__( + self, + session_maker: Callable[..., AsyncSession], + data_connector_repo: DataConnectorRepository, + user_repo: UserRepo, + secret_service_public_key: rsa.RSAPublicKey, + ) -> None: + self.session_maker = session_maker + self.data_connector_repo = data_connector_repo + self.user_repo = user_repo + self.secret_service_public_key = secret_service_public_key + + async def get_data_connector_secrets( + self, + user: base_models.APIUser, + data_connector_id: ULID, + ) -> list[models.DataConnectorSecret]: + """Get data connectors secrets from the database.""" + if user.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + + async with self.session_maker() as session: + stmt = ( + select(schemas.DataConnectorSecretORM) + .where(schemas.DataConnectorSecretORM.user_id == user.id) + .where(schemas.DataConnectorSecretORM.data_connector_id == data_connector_id) + .where(schemas.DataConnectorSecretORM.secret_id == secrets_schemas.SecretORM.id) + .where(secrets_schemas.SecretORM.user_id == user.id) + ) + results = await session.scalars(stmt) + secrets = results.all() + + return [secret.dump() for secret in secrets] + + async def patch_data_connector_secrets( + self, user: base_models.APIUser, data_connector_id: ULID, secrets: list[models.DataConnectorSecretUpdate] + ) -> list[models.DataConnectorSecret]: + """Create, update or remove data connector secrets.""" + if user.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + + # NOTE: check that the user can access the data connector + await self.data_connector_repo.get_data_connector(user=user, data_connector_id=data_connector_id) + + secrets_as_dict = {s.name: s.value for s in secrets} + + async with self.session_maker() as session, session.begin(): + stmt = ( + select(schemas.DataConnectorSecretORM) + .where(schemas.DataConnectorSecretORM.user_id == user.id) + .where(schemas.DataConnectorSecretORM.data_connector_id == data_connector_id) + .where(schemas.DataConnectorSecretORM.secret_id == secrets_schemas.SecretORM.id) + .where(secrets_schemas.SecretORM.user_id == user.id) + ) + result = await session.scalars(stmt) + existing_secrets = result.all() + existing_secrets_as_dict = {s.name: s for s in existing_secrets} + + all_secrets = [] + + for name, value in secrets_as_dict.items(): + if value is None: + # Remove the secret + data_connector_secret_orm = existing_secrets_as_dict.get(name) + if data_connector_secret_orm is None: + continue + await session.delete(data_connector_secret_orm.secret) + del existing_secrets_as_dict[name] + continue + + encrypted_value, encrypted_key = await encrypt_user_secret( + user_repo=self.user_repo, + requested_by=user, + secret_service_public_key=self.secret_service_public_key, + secret_value=value, + ) + + if data_connector_secret_orm := existing_secrets_as_dict.get(name): + data_connector_secret_orm.secret.update( + encrypted_value=encrypted_value, encrypted_key=encrypted_key + ) + else: + secret_orm = secrets_schemas.SecretORM( + name=f"{data_connector_id}-{name}", + user_id=user.id, + encrypted_value=encrypted_value, + encrypted_key=encrypted_key, + kind=SecretKind.storage, + ) + data_connector_secret_orm = schemas.DataConnectorSecretORM( + name=name, + user_id=user.id, + data_connector_id=data_connector_id, + secret_id=secret_orm.id, + ) + session.add(secret_orm) + session.add(data_connector_secret_orm) + + all_secrets.append(data_connector_secret_orm.dump()) + + return all_secrets + + async def delete_data_connector_secrets(self, user: base_models.APIUser, data_connector_id: ULID) -> None: + """Delete data connector secrets.""" + if user.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + + async with self.session_maker() as session, session.begin(): + stmt = ( + delete(secrets_schemas.SecretORM) + .where(secrets_schemas.SecretORM.user_id == user.id) + .where(secrets_schemas.SecretORM.id == schemas.DataConnectorSecretORM.secret_id) + .where(schemas.DataConnectorSecretORM.data_connector_id == data_connector_id) + ) + await session.execute(stmt) + + +_T = TypeVar("_T") + + +def _filter_by_namespace_slug(statement: Select[tuple[_T]], namespace: str) -> Select[tuple[_T]]: + """Filters a select query on data connectors to a given namespace.""" + return ( + statement.where(ns_schemas.NamespaceORM.slug == namespace.lower()) + .where(ns_schemas.EntitySlugORM.namespace_id == ns_schemas.NamespaceORM.id) + .where(schemas.DataConnectorORM.id == ns_schemas.EntitySlugORM.data_connector_id) + ) diff --git a/components/renku_data_services/data_connectors/migration_utils.py b/components/renku_data_services/data_connectors/migration_utils.py new file mode 100644 index 000000000..62b965161 --- /dev/null +++ b/components/renku_data_services/data_connectors/migration_utils.py @@ -0,0 +1,214 @@ +"""Utilities to migrate storages_v2 to data_connectors.""" + +import random +import string +from collections.abc import Callable + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import load_only +from ulid import ULID + +from renku_data_services import base_models, errors +from renku_data_services.authz.authz import Authz, ResourceType, Role +from renku_data_services.authz.models import Scope +from renku_data_services.base_models.core import Slug +from renku_data_services.data_connectors import models +from renku_data_services.data_connectors.db import DataConnectorProjectLinkRepository, DataConnectorRepository +from renku_data_services.namespace.models import NamespaceKind +from renku_data_services.project import models as projects_models +from renku_data_services.project import orm as projects_schemas +from renku_data_services.project.db import ProjectRepository +from renku_data_services.storage import models as storage_models +from renku_data_services.storage import orm as storage_schemas + + +class DataConnectorMigrationTool: + """Tool to help migrate storages_v2 to data_connectors.""" + + def __init__( + self, + session_maker: Callable[..., AsyncSession], + data_connector_repo: DataConnectorRepository, + data_connector_project_link_repo: DataConnectorProjectLinkRepository, + project_repo: ProjectRepository, + authz: Authz, + ) -> None: + self.session_maker = session_maker + self.data_connector_repo = data_connector_repo + self.data_connector_project_link_repo = data_connector_project_link_repo + self.project_repo = project_repo + self.authz = authz + + async def migrate_storage_v2( + self, requested_by: base_models.APIUser, storage: storage_models.CloudStorage + ) -> models.DataConnector: + """Move a storage_v2 entity to the data connectors table.""" + if requested_by.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + if not requested_by.is_admin: + raise errors.ForbiddenError(message="Only admins can perform this operation.") + + project_id = ULID.from_str(storage.project_id) + project = await self.project_repo.get_project(user=requested_by, project_id=project_id) + + # Find an owner + data_connector_owner = await self._find_owner(requested_by=requested_by, project=project) + if data_connector_owner is None: + raise errors.ProgrammingError( + message=f"Could not find an owner for storage {storage.name} with project {project_id}." + ) + + # Try to create the data connector with the default slug first + try: + data_connector = await self._insert_data_connector( + user=data_connector_owner, storage=storage, project=project + ) + except errors.ConflictError: + # Retry with a random suffix in the slug + suffix = "".join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)]) # nosec B311 + data_connector_slug = Slug.from_name(storage.name).value + data_connector_slug = f"{data_connector_slug}-{suffix}" + data_connector = await self._insert_data_connector( + user=data_connector_owner, storage=storage, project=project, data_connector_slug=data_connector_slug + ) + + # Link the data connector to the project + unsaved_link = models.UnsavedDataConnectorToProjectLink( + data_connector_id=data_connector.id, + project_id=project_id, + ) + await self.data_connector_project_link_repo.insert_link(user=data_connector_owner, link=unsaved_link) + + # Remove the storage_v2 from the database + await self._delete_storage_v2(requested_by=requested_by, storage_id=storage.storage_id) + + return data_connector + + async def _find_owner( + self, requested_by: base_models.APIUser, project: projects_models.Project + ) -> base_models.APIUser | None: + """Find an owner from the project or its namespace.""" + if requested_by.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + if not requested_by.is_admin: + raise errors.ForbiddenError(message="Only admins can perform this operation.") + + # Use the corresponding user in the case of a user namespace + if project.namespace.kind == NamespaceKind.user: + user_id = str(project.namespace.underlying_resource_id) + return base_models.APIUser(is_admin=False, id=user_id) + + if not isinstance(project.namespace.underlying_resource_id, ULID): + raise errors.ProgrammingError( + message=f"Group namespace {project.namespace.slug} has an invalid underlying resource id {project.namespace.underlying_resource_id}." # noqa E501 + ) + + group_id = project.namespace.underlying_resource_id + project_members = await self.authz.members(requested_by, ResourceType.project, project.id) + + # Try with the project creator + project_creator = next(filter(lambda m: m.user_id == project.created_by, project_members), None) + if project_creator is not None: + project_creator_api_user = base_models.APIUser(is_admin=False, id=project_creator.user_id) + can_create_data_connector = await self.authz.has_permission( + project_creator_api_user, ResourceType.group, group_id, Scope.WRITE + ) + if can_create_data_connector: + return project_creator_api_user + + # Try to find a project owner which can create the data connector + for member in project_members: + if member.role != Role.OWNER: + continue + member_api_user = base_models.APIUser(is_admin=False, id=member.user_id) + can_create_data_connector = await self.authz.has_permission( + member_api_user, ResourceType.group, group_id, Scope.WRITE + ) + if can_create_data_connector: + return member_api_user + + # Use any group owner as a last resort + group_members = await self.authz.members(requested_by, ResourceType.group, group_id) + found_owner = next(filter(lambda m: m.role == Role.OWNER, group_members), None) + if found_owner is not None: + return base_models.APIUser(is_admin=False, id=found_owner.user_id) + return None + + async def _insert_data_connector( + self, + user: base_models.APIUser, + storage: storage_models.CloudStorage, + project: projects_models.Project, + data_connector_slug: str | None = None, + ) -> models.DataConnector: + """Attemtps to save a data connector with the same properties as the given storage_v2.""" + data_connector_slug = data_connector_slug if data_connector_slug else Slug.from_name(storage.name).value + + unsaved_storage = models.CloudStorageCore( + storage_type=storage.storage_type, + configuration=storage.configuration.config, + source_path=storage.source_path, + target_path=storage.target_path, + readonly=storage.readonly, + ) + unsaved_data_connector = models.UnsavedDataConnector( + name=storage.name, + namespace=project.namespace.slug, + slug=data_connector_slug, + visibility=project.visibility, + created_by="", + storage=unsaved_storage, + ) + + data_connector = await self.data_connector_repo.insert_data_connector( + user=user, data_connector=unsaved_data_connector + ) + return data_connector + + async def get_storages_v2(self, requested_by: base_models.APIUser) -> list[storage_models.CloudStorage]: + """Get the storages associated with a Renku 2.0 project.""" + if requested_by.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + if not requested_by.is_admin: + raise errors.ForbiddenError(message="Only admins can perform this operation.") + + all_project_ids = await self._get_all_project_ids(requested_by=requested_by) + all_project_ids_str = [str(pid) for pid in all_project_ids] + + async with self.session_maker() as session: + stmt = select(storage_schemas.CloudStorageORM).where( + storage_schemas.CloudStorageORM.project_id.in_(all_project_ids_str) + ) + result = await session.scalars(stmt) + storages = result.all() + return [storage.dump() for storage in storages] + + async def _delete_storage_v2(self, requested_by: base_models.APIUser, storage_id: ULID) -> ULID | None: + """Delete a storage_v2 from the database.""" + if requested_by.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + if not requested_by.is_admin: + raise errors.ForbiddenError(message="Only admins can perform this operation.") + + async with self.session_maker() as session, session.begin(): + result = await session.scalars( + select(storage_schemas.CloudStorageORM).where(storage_schemas.CloudStorageORM.storage_id == storage_id) + ) + storage = result.one_or_none() + if storage is None: + return None + await session.delete(storage) + return storage_id + + async def _get_all_project_ids(self, requested_by: base_models.APIUser) -> list[ULID]: + """Get all Renku 2.0 projects.""" + if requested_by.id is None: + raise errors.UnauthorizedError(message="You do not have the required permissions for this operation.") + if not requested_by.is_admin: + raise errors.ForbiddenError(message="Only admins can perform this operation.") + + async with self.session_maker() as session: + stmt = select(projects_schemas.ProjectORM).options(load_only(projects_schemas.ProjectORM.id)) + result = await session.scalars(stmt) + return [project.id for project in result] diff --git a/components/renku_data_services/data_connectors/models.py b/components/renku_data_services/data_connectors/models.py new file mode 100644 index 000000000..7245bf8e9 --- /dev/null +++ b/components/renku_data_services/data_connectors/models.py @@ -0,0 +1,136 @@ +"""Models for data connectors.""" + +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Any + +from ulid import ULID + +from renku_data_services.authz.models import Visibility +from renku_data_services.namespace.models import Namespace +from renku_data_services.utils.etag import compute_etag_from_timestamp + +if TYPE_CHECKING: + from renku_data_services.storage.rclone import RCloneOption + + +@dataclass(frozen=True, eq=True, kw_only=True) +class CloudStorageCore: + """Remote storage configuration model.""" + + storage_type: str + configuration: dict[str, Any] + source_path: str + target_path: str + readonly: bool + + +@dataclass(frozen=True, eq=True, kw_only=True) +class BaseDataConnector: + """Base data connector model.""" + + name: str + slug: str + visibility: Visibility + created_by: str + creation_date: datetime = field(default_factory=lambda: datetime.now(UTC)) + updated_at: datetime | None = field(default=None) + description: str | None = None + keywords: list[str] | None = None + storage: CloudStorageCore + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnector(BaseDataConnector): + """Data connector model.""" + + id: ULID + namespace: Namespace + updated_at: datetime + + @property + def etag(self) -> str: + """Entity tag value for this data connector object.""" + return compute_etag_from_timestamp(self.updated_at, include_quotes=True) + + +@dataclass(frozen=True, eq=True, kw_only=True) +class UnsavedDataConnector(BaseDataConnector): + """A data connector that hasn't been stored in the database.""" + + namespace: str + + +@dataclass(frozen=True, eq=True, kw_only=True) +class CloudStorageCorePatch: + """Model for changes requested on a remote storage configuration.""" + + storage_type: str | None + configuration: dict[str, Any] | None + source_path: str | None + target_path: str | None + readonly: bool | None + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnectorPatch: + """Model for changes requested on a data connector.""" + + name: str | None + namespace: str | None + slug: str | None + visibility: Visibility | None + description: str | None + keywords: list[str] | None + storage: CloudStorageCorePatch | None + + +@dataclass(frozen=True, eq=True, kw_only=True) +class CloudStorageCoreWithSensitiveFields(CloudStorageCore): + """Remote storage configuration model with sensitive fields.""" + + sensitive_fields: list["RCloneOption"] + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnectorUpdate: + """Information about the update of a data connector.""" + + old: DataConnector + new: DataConnector + + +@dataclass(frozen=True, eq=True, kw_only=True) +class UnsavedDataConnectorToProjectLink: + """Base model for a link from a data connector to a project.""" + + data_connector_id: ULID + project_id: ULID + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnectorToProjectLink(UnsavedDataConnectorToProjectLink): + """A link from a data connector to a project.""" + + id: ULID + created_by: str + creation_date: datetime + updated_at: datetime + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnectorSecret: + """Data connector secret model.""" + + name: str + user_id: str + data_connector_id: ULID + secret_id: ULID + + +@dataclass(frozen=True, eq=True, kw_only=True) +class DataConnectorSecretUpdate: + """Secret to be saved for a data connector.""" + + name: str + value: str | None diff --git a/components/renku_data_services/data_connectors/orm.py b/components/renku_data_services/data_connectors/orm.py new file mode 100644 index 000000000..447168d34 --- /dev/null +++ b/components/renku_data_services/data_connectors/orm.py @@ -0,0 +1,198 @@ +"""SQLAlchemy schemas for the data connectors database.""" + +from datetime import datetime +from typing import TYPE_CHECKING, Any + +from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, MetaData, String, func +from sqlalchemy.dialects.postgresql import ARRAY, JSONB +from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship +from sqlalchemy.schema import Index, UniqueConstraint +from ulid import ULID + +from renku_data_services.authz import models as authz_models +from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY +from renku_data_services.data_connectors import models +from renku_data_services.data_connectors.apispec import Visibility +from renku_data_services.project.orm import ProjectORM +from renku_data_services.secrets.orm import SecretORM +from renku_data_services.users.orm import UserORM +from renku_data_services.utils.sqlalchemy import ULIDType + +if TYPE_CHECKING: + from renku_data_services.namespace.orm import EntitySlugORM + +JSONVariant = JSON().with_variant(JSONB(), "postgresql") + + +class BaseORM(MappedAsDataclass, DeclarativeBase): + """Base class for all ORM classes.""" + + metadata = MetaData(schema="storage") + registry = COMMON_ORM_REGISTRY + + +class DataConnectorORM(BaseORM): + """A data connector for Renku 2.0.""" + + __tablename__ = "data_connectors" + + id: Mapped[ULID] = mapped_column("id", ULIDType, primary_key=True, default_factory=lambda: str(ULID()), init=False) + """ID of this data connector.""" + + name: Mapped[str] = mapped_column("name", String(99)) + """Name of the data connector.""" + + visibility: Mapped[Visibility] + """Visibility of the data connector.""" + + storage_type: Mapped[str] = mapped_column("storage_type", String(20)) + """Type of storage (e.g. s3), read-only based on 'configuration'.""" + + configuration: Mapped[dict[str, Any]] = mapped_column("configuration", JSONVariant) + """RClone configuration dict.""" + + source_path: Mapped[str] = mapped_column("source_path", String()) + """Source path to mount from (e.g. bucket/folder for s3).""" + + target_path: Mapped[str] = mapped_column("target_path", String()) + """Target folder in the repository to mount to.""" + + created_by_id: Mapped[str] = mapped_column(ForeignKey(UserORM.keycloak_id), index=True, nullable=False) + """User ID of the creator of the data connector.""" + + description: Mapped[str | None] = mapped_column("description", String(500)) + """Human-readable description of the data connector.""" + + keywords: Mapped[list[str] | None] = mapped_column("keywords", ARRAY(String(99)), nullable=True) + """Keywords for the data connector.""" + + slug: Mapped["EntitySlugORM"] = relationship( + lazy="joined", init=False, repr=False, viewonly=True, back_populates="data_connector" + ) + """Slug of the data connector.""" + + readonly: Mapped[bool] = mapped_column("readonly", Boolean(), default=True) + """Whether this storage should be mounted readonly or not """ + + creation_date: Mapped[datetime] = mapped_column( + "creation_date", DateTime(timezone=True), default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + "updated_at", + DateTime(timezone=True), + default=None, + server_default=func.now(), + onupdate=func.now(), + nullable=False, + ) + + def dump(self) -> models.DataConnector: + """Create a data connector model from the DataConnectorORM.""" + return models.DataConnector( + id=self.id, + name=self.name, + slug=self.slug.slug, + namespace=self.slug.namespace.dump(), + visibility=self._dump_visibility(), + created_by=self.created_by_id, + creation_date=self.creation_date, + updated_at=self.updated_at, + storage=self._dump_storage(), + description=self.description, + keywords=self.keywords, + ) + + def _dump_visibility(self) -> authz_models.Visibility: + return ( + authz_models.Visibility.PUBLIC if self.visibility == Visibility.public else authz_models.Visibility.PRIVATE + ) + + def _dump_storage(self) -> models.CloudStorageCore: + return models.CloudStorageCore( + storage_type=self.storage_type, + configuration=self.configuration, + source_path=self.source_path, + target_path=self.target_path, + readonly=self.readonly, + ) + + +class DataConnectorToProjectLinkORM(BaseORM): + """A link from a data connector to a project in Renku 2.0.""" + + __tablename__ = "data_connector_to_project_links" + __table_args__ = ( + UniqueConstraint( + "data_connector_id", + "project_id", + name="_unique_data_connector_id_project_id_uc", + ), + ) + + id: Mapped[ULID] = mapped_column("id", ULIDType, primary_key=True, default_factory=lambda: str(ULID()), init=False) + """ID of this data connector to project link.""" + + data_connector_id: Mapped[ULID] = mapped_column( + ForeignKey(DataConnectorORM.id, ondelete="CASCADE"), index=True, nullable=False + ) + """ID of the data connector.""" + + project_id: Mapped[ULID] = mapped_column(ForeignKey(ProjectORM.id, ondelete="CASCADE"), index=True, nullable=False) + """ID of the project.""" + + created_by_id: Mapped[str] = mapped_column( + ForeignKey(UserORM.keycloak_id, ondelete="CASCADE"), index=True, nullable=False + ) + """User ID of the creator of the data connector.""" + + creation_date: Mapped[datetime] = mapped_column( + "creation_date", DateTime(timezone=True), default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + "updated_at", + DateTime(timezone=True), + default=None, + server_default=func.now(), + onupdate=func.now(), + nullable=False, + ) + + def dump(self) -> models.DataConnectorToProjectLink: + """Create a link model from the DataConnectorProjectLinkORM.""" + return models.DataConnectorToProjectLink( + id=self.id, + data_connector_id=self.data_connector_id, + project_id=self.project_id, + created_by=self.created_by_id, + creation_date=self.creation_date, + updated_at=self.updated_at, + ) + + +class DataConnectorSecretORM(BaseORM): + """Secrets for data connectors.""" + + __tablename__ = "data_connector_secrets" + __table_args__ = ( + Index("ix_storage_data_connector_secrets_user_id_data_connector_id", "user_id", "data_connector_id"), + ) + + user_id: Mapped[str] = mapped_column(ForeignKey(UserORM.keycloak_id, ondelete="CASCADE"), primary_key=True) + + data_connector_id: Mapped[ULID] = mapped_column( + ForeignKey(DataConnectorORM.id, ondelete="CASCADE"), primary_key=True + ) + + name: Mapped[str] = mapped_column("name", String(), primary_key=True) + + secret_id: Mapped[ULID] = mapped_column("secret_id", ForeignKey(SecretORM.id, ondelete="CASCADE")) + secret: Mapped[SecretORM] = relationship(init=False, repr=False, lazy="selectin") + + def dump(self) -> models.DataConnectorSecret: + """Create a data connector secret model from the DataConnectorSecretORM.""" + return models.DataConnectorSecret( + name=self.name, + user_id=self.user_id, + data_connector_id=self.data_connector_id, + secret_id=self.secret_id, + ) diff --git a/components/renku_data_services/migrations/env.py b/components/renku_data_services/migrations/env.py index 3f360a946..70370b961 100644 --- a/components/renku_data_services/migrations/env.py +++ b/components/renku_data_services/migrations/env.py @@ -5,6 +5,7 @@ from renku_data_services.authz.orm import BaseORM as authz from renku_data_services.connected_services.orm import BaseORM as connected_services from renku_data_services.crc.orm import BaseORM as crc +from renku_data_services.data_connectors.orm import BaseORM as data_connectors from renku_data_services.message_queue.orm import BaseORM as events from renku_data_services.migrations.utils import logging_config, run_migrations from renku_data_services.namespace.orm import BaseORM as namespaces @@ -23,6 +24,7 @@ authz.metadata, crc.metadata, connected_services.metadata, + data_connectors.metadata, events.metadata, namespaces.metadata, platform.metadata, diff --git a/components/renku_data_services/migrations/versions/3cf2adf9896b_add_data_connectors.py b/components/renku_data_services/migrations/versions/3cf2adf9896b_add_data_connectors.py new file mode 100644 index 000000000..d6f1d918b --- /dev/null +++ b/components/renku_data_services/migrations/versions/3cf2adf9896b_add_data_connectors.py @@ -0,0 +1,152 @@ +"""add data connectors + +Revision ID: 3cf2adf9896b +Revises: a11752a5afba +Create Date: 2024-09-05 14:20:47.006275 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "3cf2adf9896b" +down_revision = "a11752a5afba" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + visibility = postgresql.ENUM(name="visibility", create_type=False) + + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "data_connectors", + sa.Column("id", sa.String(length=26), nullable=False), + sa.Column("name", sa.String(length=99), nullable=False), + sa.Column("visibility", visibility, nullable=False), + sa.Column("storage_type", sa.String(length=20), nullable=False), + sa.Column( + "configuration", + sa.JSON().with_variant(postgresql.JSONB(astext_type=sa.Text()), "postgresql"), + nullable=False, + ), + sa.Column("source_path", sa.String(), nullable=False), + sa.Column("target_path", sa.String(), nullable=False), + sa.Column("created_by_id", sa.String(length=36), nullable=False), + sa.Column("description", sa.String(length=500), nullable=True), + sa.Column("keywords", postgresql.ARRAY(sa.String(length=99)), nullable=True), + sa.Column("readonly", sa.Boolean(), nullable=False), + sa.Column("creation_date", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.ForeignKeyConstraint( + ["created_by_id"], + ["users.users.keycloak_id"], + ), + sa.PrimaryKeyConstraint("id"), + schema="storage", + ) + op.create_index( + op.f("ix_storage_data_connectors_created_by_id"), + "data_connectors", + ["created_by_id"], + unique=False, + schema="storage", + ) + op.add_column("entity_slugs", sa.Column("data_connector_id", sa.String(length=26), nullable=True), schema="common") + op.alter_column("entity_slugs", "project_id", existing_type=sa.String(length=26), nullable=True, schema="common") + op.create_index( + op.f("ix_common_entity_slugs_data_connector_id"), + "entity_slugs", + ["data_connector_id"], + unique=False, + schema="common", + ) + op.create_foreign_key( + "entity_slugs_data_connector_id_fk", + "entity_slugs", + "data_connectors", + ["data_connector_id"], + ["id"], + source_schema="common", + referent_schema="storage", + ondelete="CASCADE", + ) + op.create_check_constraint( + "either_project_id_or_data_connector_id_is_set", + "entity_slugs", + "CAST (project_id IS NOT NULL AS int) + CAST (data_connector_id IS NOT NULL AS int) BETWEEN 0 AND 1", + schema="common", + ) + op.create_table( + "data_connector_secrets", + sa.Column("user_id", sa.String(length=36), nullable=False), + sa.Column("data_connector_id", sa.String(length=26), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("secret_id", sa.String(length=26), nullable=False), + sa.ForeignKeyConstraint(["data_connector_id"], ["storage.data_connectors.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["secret_id"], ["secrets.secrets.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["user_id"], ["users.users.keycloak_id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("user_id", "data_connector_id", "name"), + schema="storage", + ) + op.create_index( + "ix_storage_data_connector_secrets_user_id_data_connector_id", + "data_connector_secrets", + ["user_id", "data_connector_id"], + unique=False, + schema="storage", + ) + op.drop_index( + "ix_storage_cloud_storage_secrets_user_id_storage_id", table_name="cloud_storage_secrets", schema="storage" + ) + op.drop_table("cloud_storage_secrets", schema="storage") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "cloud_storage_secrets", + sa.Column("user_id", sa.String(length=36), autoincrement=False, nullable=False), + sa.Column("storage_id", sa.String(length=26), autoincrement=False, nullable=False), + sa.Column("name", sa.String(length=99), autoincrement=False, nullable=False), + sa.Column("secret_id", sa.String(length=26), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint( + ["secret_id"], ["secrets.secrets.id"], name="cloud_storage_secrets_secret_id_fkey", ondelete="CASCADE" + ), + sa.ForeignKeyConstraint( + ["storage_id"], + ["storage.cloud_storage.storage_id"], + name="cloud_storage_secrets_storage_id_fkey", + ondelete="CASCADE", + ), + sa.ForeignKeyConstraint( + ["user_id"], ["users.users.keycloak_id"], name="cloud_storage_secrets_user_id_fkey", ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("user_id", "storage_id", "name", name="_unique_user_id_storage_id_name"), + schema="storage", + ) + op.create_index( + "ix_storage_cloud_storage_secrets_user_id_storage_id", + "cloud_storage_secrets", + ["user_id", "storage_id"], + unique=False, + schema="storage", + ) + op.drop_index( + "ix_storage_data_connector_secrets_user_id_data_connector_id", + table_name="data_connector_secrets", + schema="storage", + ) + op.drop_table("data_connector_secrets", schema="storage") + op.drop_constraint("either_project_id_or_data_connector_id_is_set", "entity_slugs", schema="common", type_="check") + op.execute("DELETE FROM common.entity_slugs WHERE entity_slugs.data_connector_id IS NOT NULL") + op.drop_constraint("entity_slugs_data_connector_id_fk", "entity_slugs", schema="common", type_="foreignkey") + op.drop_index(op.f("ix_common_entity_slugs_data_connector_id"), table_name="entity_slugs", schema="common") + op.alter_column("entity_slugs", "project_id", existing_type=sa.String(length=26), nullable=False, schema="common") + op.drop_column("entity_slugs", "data_connector_id", schema="common") + op.drop_index(op.f("ix_storage_data_connectors_created_by_id"), table_name="data_connectors", schema="storage") + op.drop_table("data_connectors", schema="storage") + # ### end Alembic commands ### diff --git a/components/renku_data_services/migrations/versions/5335b8548c79_add_authorization_for_data_connectors.py b/components/renku_data_services/migrations/versions/5335b8548c79_add_authorization_for_data_connectors.py new file mode 100644 index 000000000..4dbd9323e --- /dev/null +++ b/components/renku_data_services/migrations/versions/5335b8548c79_add_authorization_for_data_connectors.py @@ -0,0 +1,61 @@ +"""add authorization for data connectors + +Revision ID: 5335b8548c79 +Revises: 3cf2adf9896b +Create Date: 2024-09-12 13:11:11.087316 + +""" + +import logging + +import sqlalchemy as sa +from alembic import op + +from renku_data_services.authz.config import AuthzConfig +from renku_data_services.authz.schemas import generate_v4 + +# revision identifiers, used by Alembic. +revision = "5335b8548c79" +down_revision = "3cf2adf9896b" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + config = AuthzConfig.from_env() + client = config.authz_client() + connection = op.get_bind() + with connection.begin_nested() as tx: + op.execute(sa.text("LOCK TABLE projects.projects IN EXCLUSIVE MODE")) + stmt = ( + sa.select(sa.column("id", type_=sa.VARCHAR)) + .select_from(sa.table("projects", schema="projects")) + .where(sa.column("visibility") == sa.literal("public", type_=sa.Enum("visibility"))) + ) + project_ids = connection.scalars(stmt).all() + v4 = generate_v4(project_ids) + responses = v4.upgrade(client) + tx.commit() + logging.info( + f"Finished upgrading the Authz schema to version 4 in Alembic revision {revision}, response: {responses}" + ) + + +def downgrade() -> None: + config = AuthzConfig.from_env() + client = config.authz_client() + connection = op.get_bind() + with connection.begin_nested() as tx: + op.execute(sa.text("LOCK TABLE projects.projects IN EXCLUSIVE MODE")) + stmt = ( + sa.select(sa.column("id", type_=sa.VARCHAR)) + .select_from(sa.table("projects", schema="projects")) + .where(sa.column("visibility") == sa.literal("public", type_=sa.Enum("visibility"))) + ) + project_ids = connection.scalars(stmt).all() + v4 = generate_v4(project_ids) + responses = v4.downgrade(client) + tx.commit() + logging.info( + f"Finished downgrading the Authz schema from version 4 in Alembic revision {revision}, response: {responses}" + ) diff --git a/components/renku_data_services/migrations/versions/88af2fdd2cc7_add_links_from_data_connectors_to_.py b/components/renku_data_services/migrations/versions/88af2fdd2cc7_add_links_from_data_connectors_to_.py new file mode 100644 index 000000000..0bf5825c6 --- /dev/null +++ b/components/renku_data_services/migrations/versions/88af2fdd2cc7_add_links_from_data_connectors_to_.py @@ -0,0 +1,78 @@ +"""add links from data connectors to projects + +Revision ID: 88af2fdd2cc7 +Revises: 5335b8548c79 +Create Date: 2024-09-17 13:55:43.783482 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "88af2fdd2cc7" +down_revision = "5335b8548c79" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "data_connector_to_project_links", + sa.Column("id", sa.String(length=26), nullable=False), + sa.Column("data_connector_id", sa.String(length=26), nullable=False), + sa.Column("project_id", sa.String(length=26), nullable=False), + sa.Column("created_by_id", sa.String(length=36), nullable=False), + sa.Column("creation_date", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.ForeignKeyConstraint(["created_by_id"], ["users.users.keycloak_id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["data_connector_id"], ["storage.data_connectors.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["project_id"], ["projects.projects.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("data_connector_id", "project_id", name="_unique_data_connector_id_project_id_uc"), + schema="storage", + ) + op.create_index( + op.f("ix_storage_data_connector_to_project_links_created_by_id"), + "data_connector_to_project_links", + ["created_by_id"], + unique=False, + schema="storage", + ) + op.create_index( + op.f("ix_storage_data_connector_to_project_links_data_connector_id"), + "data_connector_to_project_links", + ["data_connector_id"], + unique=False, + schema="storage", + ) + op.create_index( + op.f("ix_storage_data_connector_to_project_links_project_id"), + "data_connector_to_project_links", + ["project_id"], + unique=False, + schema="storage", + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index( + op.f("ix_storage_data_connector_to_project_links_project_id"), + table_name="data_connector_to_project_links", + schema="storage", + ) + op.drop_index( + op.f("ix_storage_data_connector_to_project_links_data_connector_id"), + table_name="data_connector_to_project_links", + schema="storage", + ) + op.drop_index( + op.f("ix_storage_data_connector_to_project_links_created_by_id"), + table_name="data_connector_to_project_links", + schema="storage", + ) + op.drop_table("data_connector_to_project_links", schema="storage") + # ### end Alembic commands ### diff --git a/components/renku_data_services/migrations/versions/a11752a5afba_migrate_to_entity_slugs.py b/components/renku_data_services/migrations/versions/a11752a5afba_migrate_to_entity_slugs.py new file mode 100644 index 000000000..a16c20456 --- /dev/null +++ b/components/renku_data_services/migrations/versions/a11752a5afba_migrate_to_entity_slugs.py @@ -0,0 +1,118 @@ +"""migrate to entity slugs + +Revision ID: a11752a5afba +Revises: 726d5d0e1f28 +Create Date: 2024-09-03 11:18:46.025525 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "a11752a5afba" +down_revision = "726d5d0e1f28" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + connection = op.get_bind() + + op.execute("ALTER TABLE projects.project_slugs SET SCHEMA common") + op.rename_table("project_slugs", "entity_slugs", schema="common") + op.execute("ALTER INDEX common.project_slugs_unique_slugs RENAME TO entity_slugs_unique_slugs") + op.execute( + "ALTER INDEX common.ix_projects_project_slugs_namespace_id RENAME TO ix_common_entity_slugs_namespace_id" + ) + op.execute("ALTER INDEX common.ix_projects_project_slugs_project_id RENAME TO ix_common_entity_slugs_project_id") + op.execute("ALTER INDEX common.ix_projects_project_slugs_slug RENAME TO ix_common_entity_slugs_slug") + op.execute("ALTER SEQUENCE common.project_slugs_id_seq RENAME TO entity_slugs_id_seq") + op.drop_constraint("project_slugs_project_id_fk", "entity_slugs", schema="common", type_="foreignkey") + op.create_foreign_key( + "entity_slugs_project_id_fk", + "entity_slugs", + "projects", + ["project_id"], + ["id"], + source_schema="common", + referent_schema="projects", + ondelete="CASCADE", + ) + + op.execute("ALTER TABLE projects.project_slugs_old SET SCHEMA common") + op.rename_table("project_slugs_old", "entity_slugs_old", schema="common") + op.execute( + "ALTER INDEX common.ix_projects_project_slugs_old_created_at RENAME TO ix_common_entity_slugs_old_created_at" + ) + op.execute( + "ALTER INDEX common.ix_projects_project_slugs_old_latest_slug_id RENAME TO ix_common_entity_slugs_old_latest_slug_id" + ) + op.execute("ALTER INDEX common.ix_projects_project_slugs_old_slug RENAME TO ix_common_entity_slugs_old_slug") + op.execute("ALTER SEQUENCE common.project_slugs_old_id_seq RENAME TO entity_slugs_old_id_seq") + + tables = ["entity_slugs", "entity_slugs_old"] + inspector = sa.inspect(op.get_bind()) + found_sequences = inspector.get_sequence_names("common") + for table in tables: + seq = f"{table}_id_seq" + if seq not in found_sequences: + continue + last_id_stmt = sa.select(sa.func.max(sa.column("id", type_=sa.INT))).select_from( + sa.table(table, schema="common") + ) + last_id = connection.scalars(last_id_stmt).one_or_none() + if last_id is None or last_id <= 0: + continue + op.execute(sa.text(f"ALTER SEQUENCE common.{seq} RESTART WITH {last_id + 1}")) + + +def downgrade() -> None: + connection = op.get_bind() + + op.drop_constraint("entity_slugs_project_id_fk", "entity_slugs", schema="common", type_="foreignkey") + op.create_foreign_key( + "project_slugs_project_id_fk", + "entity_slugs", + "projects", + ["project_id"], + ["id"], + source_schema="common", + referent_schema="projects", + ondelete="CASCADE", + ) + op.execute("ALTER SEQUENCE common.entity_slugs_id_seq RENAME TO project_slugs_id_seq") + op.execute("ALTER INDEX common.ix_common_entity_slugs_slug RENAME TO ix_projects_project_slugs_slug") + op.execute("ALTER INDEX common.ix_common_entity_slugs_project_id RENAME TO ix_projects_project_slugs_project_id") + op.execute( + "ALTER INDEX common.ix_common_entity_slugs_namespace_id RENAME TO ix_projects_project_slugs_namespace_id" + ) + op.execute("ALTER INDEX common.entity_slugs_unique_slugs RENAME TO project_slugs_unique_slugs") + op.rename_table("entity_slugs", "project_slugs", schema="common") + op.execute("ALTER TABLE common.project_slugs SET SCHEMA projects") + + op.execute("ALTER SEQUENCE common.entity_slugs_old_id_seq RENAME TO project_slugs_old_id_seq") + op.execute("ALTER INDEX common.ix_common_entity_slugs_old_slug RENAME TO ix_projects_project_slugs_old_slug") + op.execute( + "ALTER INDEX common.ix_common_entity_slugs_old_latest_slug_id RENAME TO ix_projects_project_slugs_old_latest_slug_id" + ) + op.execute( + "ALTER INDEX common.ix_common_entity_slugs_old_created_at RENAME TO ix_projects_project_slugs_old_created_at" + ) + op.rename_table("entity_slugs_old", "project_slugs_old", schema="common") + op.execute("ALTER TABLE common.project_slugs_old SET SCHEMA projects") + + tables = ["project_slugs", "project_slugs_old"] + inspector = sa.inspect(op.get_bind()) + found_sequences = inspector.get_sequence_names("projects") + for table in tables: + seq = f"{table}_id_seq" + if seq not in found_sequences: + continue + last_id_stmt = sa.select(sa.func.max(sa.column("id", type_=sa.INT))).select_from( + sa.table(table, schema="projects") + ) + last_id = connection.scalars(last_id_stmt).one_or_none() + if last_id is None or last_id <= 0: + continue + op.execute(sa.text(f"ALTER SEQUENCE projects.{seq} RESTART WITH {last_id + 1}")) diff --git a/components/renku_data_services/namespace/orm.py b/components/renku_data_services/namespace/orm.py index abc85a74c..d063662c9 100644 --- a/components/renku_data_services/namespace/orm.py +++ b/components/renku_data_services/namespace/orm.py @@ -3,14 +3,16 @@ from datetime import datetime from typing import Optional, Self, cast -from sqlalchemy import CheckConstraint, DateTime, MetaData, String, func +from sqlalchemy import CheckConstraint, DateTime, Index, MetaData, String, func from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship from sqlalchemy.schema import ForeignKey from ulid import ULID from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY +from renku_data_services.data_connectors.orm import DataConnectorORM from renku_data_services.errors import errors from renku_data_services.namespace import models +from renku_data_services.project.orm import ProjectORM from renku_data_services.users.models import UserInfo from renku_data_services.users.orm import UserORM from renku_data_services.utils.sqlalchemy import ULIDType @@ -177,3 +179,72 @@ def dump(self) -> models.Namespace: underlying_resource_id=self.latest_slug.user_id, name=name, ) + + +class EntitySlugORM(BaseORM): + """Entity slugs.""" + + __tablename__ = "entity_slugs" + __table_args__ = ( + Index("entity_slugs_unique_slugs", "namespace_id", "slug", unique=True), + CheckConstraint( + "CAST (project_id IS NOT NULL AS int) + CAST (data_connector_id IS NOT NULL AS int) BETWEEN 0 AND 1", + name="either_project_id_or_data_connector_id_is_set", + ), + ) + + id: Mapped[int] = mapped_column(primary_key=True, init=False) + slug: Mapped[str] = mapped_column(String(99), index=True, nullable=False) + project_id: Mapped[ULID | None] = mapped_column( + ForeignKey(ProjectORM.id, ondelete="CASCADE", name="entity_slugs_project_id_fk"), index=True, nullable=True + ) + project: Mapped[ProjectORM | None] = relationship(init=False, repr=False, back_populates="slug") + data_connector_id: Mapped[ULID | None] = mapped_column( + ForeignKey(DataConnectorORM.id, ondelete="CASCADE", name="entity_slugs_data_connector_id_fk"), + index=True, + nullable=True, + ) + data_connector: Mapped[DataConnectorORM | None] = relationship(init=False, repr=False, back_populates="slug") + namespace_id: Mapped[ULID] = mapped_column( + ForeignKey(NamespaceORM.id, ondelete="CASCADE", name="entity_slugs_namespace_id_fk"), index=True + ) + namespace: Mapped[NamespaceORM] = relationship(lazy="joined", init=False, repr=False, viewonly=True) + + @classmethod + def create_project_slug(cls, slug: str, project_id: ULID, namespace_id: ULID) -> "EntitySlugORM": + """Create an entity slug for a project.""" + return cls( + slug=slug, + project_id=project_id, + data_connector_id=None, + namespace_id=namespace_id, + ) + + @classmethod + def create_data_connector_slug(cls, slug: str, data_connector_id: ULID, namespace_id: ULID) -> "EntitySlugORM": + """Create an entity slug for a data connector.""" + return cls( + slug=slug, + project_id=None, + data_connector_id=data_connector_id, + namespace_id=namespace_id, + ) + + +class EntitySlugOldORM(BaseORM): + """Entity slugs history.""" + + __tablename__ = "entity_slugs_old" + + id: Mapped[int] = mapped_column(primary_key=True, init=False) + slug: Mapped[str] = mapped_column(String(99), index=True, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), nullable=False, index=True, init=False, server_default=func.now() + ) + latest_slug_id: Mapped[int] = mapped_column( + ForeignKey(EntitySlugORM.id, ondelete="CASCADE"), + nullable=False, + init=False, + index=True, + ) + latest_slug: Mapped[EntitySlugORM] = relationship(lazy="joined", repr=False, viewonly=True) diff --git a/components/renku_data_services/project/api.spec.yaml b/components/renku_data_services/project/api.spec.yaml index dfc385a3c..bd7d2471b 100644 --- a/components/renku_data_services/project/api.spec.yaml +++ b/components/renku_data_services/project/api.spec.yaml @@ -259,6 +259,27 @@ paths: $ref: "#/components/responses/Error" tags: - projects + /projects/{project_id}/data_connector_links: + parameters: + - in: path + name: project_id + required: true + schema: + $ref: "#/components/schemas/Ulid" + description: the ID of the project + get: + summary: Get all links from data connectors to a given project + responses: + "200": + description: List of data connector to project links + content: + application/json: + schema: + $ref: "#/components/schemas/DataConnectorToProjectLinksList" + default: + $ref: "#/components/responses/Error" + tags: + - projects components: schemas: ProjectsList: @@ -502,6 +523,32 @@ components: type: string description: Entity Tag example: "9EE498F9D565D0C41E511377425F32F3" + DataConnectorToProjectLinksList: + description: A list of links from a data connector to a project + type: array + items: + $ref: "#/components/schemas/DataConnectorToProjectLink" + DataConnectorToProjectLink: + description: A link from a data connector to a project in Renku 2.0 + type: object + additionalProperties: false + properties: + id: + $ref: "#/components/schemas/Ulid" + data_connector_id: + $ref: "#/components/schemas/Ulid" + project_id: + $ref: "#/components/schemas/Ulid" + creation_date: + $ref: "#/components/schemas/CreationDate" + created_by: + $ref: "#/components/schemas/UserId" + required: + - id + - data_connector_id + - project_id + - creation_date + - created_by ProjectGetQuery: description: Query params for project get request allOf: diff --git a/components/renku_data_services/project/apispec.py b/components/renku_data_services/project/apispec.py index af8b24edf..a964827c8 100644 --- a/components/renku_data_services/project/apispec.py +++ b/components/renku_data_services/project/apispec.py @@ -33,6 +33,44 @@ class Role(Enum): owner = "owner" +class DataConnectorToProjectLink(BaseAPISpec): + model_config = ConfigDict( + extra="forbid", + ) + id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + data_connector_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + project_id: str = Field( + ..., + description="ULID identifier", + max_length=26, + min_length=26, + pattern="^[0-7][0-9A-HJKMNP-TV-Z]{25}$", + ) + creation_date: datetime = Field( + ..., + description="The date and time the resource was created (in UTC and ISO-8601 format)", + example="2023-11-01T17:32:28Z", + ) + created_by: str = Field( + ..., + description="Keycloak user ID", + example="f74a228b-1790-4276-af5f-25c2424e9b0c", + pattern="^[A-Za-z0-9]{1}[A-Za-z0-9-]+$", + ) + + class PaginationRequest(BaseAPISpec): model_config = ConfigDict( extra="forbid", @@ -103,6 +141,12 @@ class ProjectMemberResponse(BaseAPISpec): role: Role +class DataConnectorToProjectLinksList(RootModel[List[DataConnectorToProjectLink]]): + root: List[DataConnectorToProjectLink] = Field( + ..., description="A list of links from a data connector to a project" + ) + + class ProjectGetQuery(PaginationRequest): namespace: str = Field("", description="A namespace, used as a filter.") direct_member: bool = Field( diff --git a/components/renku_data_services/project/db.py b/components/renku_data_services/project/db.py index ae5f427d8..ea78921df 100644 --- a/components/renku_data_services/project/db.py +++ b/components/renku_data_services/project/db.py @@ -22,6 +22,7 @@ from renku_data_services.message_queue.db import EventRepository from renku_data_services.message_queue.interface import IMessageQueue from renku_data_services.message_queue.redis_queue import dispatch_message +from renku_data_services.namespace import orm as ns_schemas from renku_data_services.namespace.db import GroupRepository from renku_data_services.project import apispec as project_apispec from renku_data_services.project import models @@ -117,7 +118,7 @@ async def get_project_by_namespace_slug( async with self.session_maker() as session: stmt = select(schemas.ProjectORM) stmt = _filter_by_namespace_slug(stmt, namespace) - stmt = stmt.where(schemas.ProjectSlug.slug == slug.lower()) + stmt = stmt.where(ns_schemas.EntitySlugORM.slug == slug.lower()) result = await session.execute(stmt) project_orm = result.scalars().first() @@ -153,7 +154,7 @@ async def insert_project( if not session: raise errors.ProgrammingError(message="A database session is required") ns = await session.scalar( - select(schemas.NamespaceORM).where(schemas.NamespaceORM.slug == project.namespace.lower()) + select(ns_schemas.NamespaceORM).where(ns_schemas.NamespaceORM.slug == project.namespace.lower()) ) if not ns: raise errors.MissingResourceError( @@ -189,10 +190,10 @@ async def insert_project( creation_date=datetime.now(UTC).replace(microsecond=0), keywords=project.keywords, ) - project_slug = schemas.ProjectSlug(slug, project_id=project_orm.id, namespace_id=ns.id) + project_slug = ns_schemas.EntitySlugORM.create_project_slug(slug, project_id=project_orm.id, namespace_id=ns.id) - session.add(project_slug) session.add(project_orm) + session.add(project_slug) await session.flush() await session.refresh(project_orm) @@ -259,7 +260,9 @@ async def update_project( if "namespace" in payload: ns_slug = payload["namespace"] - ns = await session.scalar(select(schemas.NamespaceORM).where(schemas.NamespaceORM.slug == ns_slug.lower())) + ns = await session.scalar( + select(ns_schemas.NamespaceORM).where(ns_schemas.NamespaceORM.slug == ns_slug.lower()) + ) if not ns: raise errors.MissingResourceError(message=f"The namespace with slug {ns_slug} does not exist") if not ns.group_id and not ns.user_id: @@ -319,9 +322,9 @@ async def delete_project( def _filter_by_namespace_slug(statement: Select[tuple[_T]], namespace: str) -> Select[tuple[_T]]: """Filters a select query on projects to a given namespace.""" return ( - statement.where(schemas.NamespaceORM.slug == namespace.lower()) - .where(schemas.ProjectSlug.namespace_id == schemas.NamespaceORM.id) - .where(schemas.ProjectORM.id == schemas.ProjectSlug.project_id) + statement.where(ns_schemas.NamespaceORM.slug == namespace.lower()) + .where(ns_schemas.EntitySlugORM.namespace_id == ns_schemas.NamespaceORM.id) + .where(schemas.ProjectORM.id == ns_schemas.EntitySlugORM.project_id) ) diff --git a/components/renku_data_services/project/orm.py b/components/renku_data_services/project/orm.py index b50232fe3..22b765d43 100644 --- a/components/renku_data_services/project/orm.py +++ b/components/renku_data_services/project/orm.py @@ -1,27 +1,29 @@ """SQLAlchemy's schemas for the projects database.""" from datetime import datetime -from typing import Optional +from typing import TYPE_CHECKING, Optional -from sqlalchemy import DateTime, Index, Integer, MetaData, String, func +from sqlalchemy import DateTime, Integer, MetaData, String, func from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship from sqlalchemy.schema import ForeignKey from ulid import ULID from renku_data_services.authz import models as authz_models -from renku_data_services.namespace.orm import NamespaceORM +from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY from renku_data_services.project import models from renku_data_services.project.apispec import Visibility from renku_data_services.utils.sqlalchemy import ULIDType -metadata_obj = MetaData(schema="projects") # Has to match alembic ini section name +if TYPE_CHECKING: + from renku_data_services.namespace.orm import EntitySlugORM class BaseORM(MappedAsDataclass, DeclarativeBase): """Base class for all ORM classes.""" - metadata = metadata_obj + metadata = MetaData(schema="projects") + registry = COMMON_ORM_REGISTRY class ProjectORM(BaseORM): @@ -36,7 +38,9 @@ class ProjectORM(BaseORM): keywords: Mapped[Optional[list[str]]] = mapped_column("keywords", ARRAY(String(99)), nullable=True) # NOTE: The project slugs table has a foreign key from the projects table, but there is a stored procedure # triggered by the deletion of slugs to remove the project used by the slug. See migration 89aa4573cfa9. - slug: Mapped["ProjectSlug"] = relationship(lazy="joined", init=False, repr=False, viewonly=True) + slug: Mapped["EntitySlugORM"] = relationship( + lazy="joined", init=False, repr=False, viewonly=True, back_populates="project" + ) repositories: Mapped[list["ProjectRepositoryORM"]] = relationship( back_populates="project", default_factory=list, @@ -81,39 +85,3 @@ class ProjectRepositoryORM(BaseORM): ForeignKey("projects.id", ondelete="CASCADE"), default=None, index=True ) project: Mapped[Optional[ProjectORM]] = relationship(back_populates="repositories", default=None, repr=False) - - -class ProjectSlug(BaseORM): - """Project and namespace slugs.""" - - __tablename__ = "project_slugs" - __table_args__ = (Index("project_slugs_unique_slugs", "namespace_id", "slug", unique=True),) - - id: Mapped[int] = mapped_column(primary_key=True, init=False) - slug: Mapped[str] = mapped_column(String(99), index=True, nullable=False) - project_id: Mapped[ULID] = mapped_column( - ForeignKey(ProjectORM.id, ondelete="CASCADE", name="project_slugs_project_id_fk"), index=True - ) - namespace_id: Mapped[ULID] = mapped_column( - ForeignKey(NamespaceORM.id, ondelete="CASCADE", name="project_slugs_namespace_id_fk"), index=True - ) - namespace: Mapped[NamespaceORM] = relationship(lazy="joined", init=False, repr=False, viewonly=True) - - -class ProjectSlugOld(BaseORM): - """Project slugs history.""" - - __tablename__ = "project_slugs_old" - - id: Mapped[int] = mapped_column(primary_key=True, init=False) - slug: Mapped[str] = mapped_column(String(99), index=True, nullable=False) - created_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), nullable=False, index=True, init=False, server_default=func.now() - ) - latest_slug_id: Mapped[int] = mapped_column( - ForeignKey(ProjectSlug.id, ondelete="CASCADE"), - nullable=False, - init=False, - index=True, - ) - latest_slug: Mapped[ProjectSlug] = relationship(lazy="joined", repr=False, viewonly=True) diff --git a/components/renku_data_services/storage/api.spec.yaml b/components/renku_data_services/storage/api.spec.yaml index 9848cdf24..c8485daf7 100644 --- a/components/renku_data_services/storage/api.spec.yaml +++ b/components/renku_data_services/storage/api.spec.yaml @@ -1,4 +1,3 @@ ---- openapi: 3.0.2 info: title: Renku Data Services API @@ -10,167 +9,6 @@ servers: - url: /api/data - url: /ui-server/api/data paths: - /storages_v2/{storage_id}: - parameters: - - in: path - name: storage_id - required: true - schema: - $ref: "#/components/schemas/UlidId" - description: the id of the storage - get: - summary: get cloud storage details - responses: - "200": - description: Found the cloud storage - content: - "application/json": - schema: - $ref: "#/components/schemas/CloudStorageGetV2" - default: - $ref: '#/components/responses/Error' - tags: - - storages_v2 - patch: - summary: partially update a cloud storage entry - requestBody: - required: true - content: - application/json: - schema: - $ref: "#/components/schemas/CloudStoragePatch" - responses: - "201": - description: The cloud storage entry was updated - content: - "application/json": - schema: - $ref: "#/components/schemas/CloudStorageGet" - default: - $ref: '#/components/responses/Error' - tags: - - storages_v2 - delete: - summary: remove a cloud storage definition - responses: - "204": - description: The rcloud storage was removed or did not exist in the first place - default: - $ref: '#/components/responses/Error' - tags: - - storages_v2 - /storages_v2: - get: - summary: get cloud storage for a project by id - parameters: - - in: query - description: query parameters - name: storage_v2_params - style: form - explode: true - schema: - type: object - additionalProperties: false - properties: - project_id: - $ref: "#/components/schemas/UlidId" - required: - - project_id - responses: - "200": - description: the storage configurations for the project - content: - "application/json": - schema: - type: array - items: - $ref: "#/components/schemas/CloudStorageGetV2" - "404": - description: Storage was not found - content: - "application/json": - schema: - $ref: "#/components/schemas/ErrorResponse" - default: - $ref: '#/components/responses/Error' - tags: - - storages_v2 - post: - summary: create a new cloud storage for a project - requestBody: - required: true - content: - application/json: - schema: - oneOf: - - $ref: "#/components/schemas/CloudStorage" - - $ref: "#/components/schemas/CloudStorageUrl" - responses: - "201": - description: The cloud storage entry was created - content: - "application/json": - schema: - $ref: "#/components/schemas/CloudStorageGet" - default: - $ref: '#/components/responses/Error' - tags: - - storages_v2 - /storages_v2/{storage_id}/secrets: - parameters: - - in: path - name: storage_id - required: true - schema: - $ref: "#/components/schemas/UlidId" - description: The id of the storage - get: - summary: Get all saved secrets for a cloud storage - responses: - "200": - description: The saved storage secrets - content: - "application/json": - schema: - $ref: "#/components/schemas/CloudStorageSecretGetList" - "404": - description: Storage was not found - content: - "application/json": - schema: - $ref: "#/components/schemas/ErrorResponse" - default: - $ref: "#/components/responses/Error" - tags: - - storages_v2 - post: - summary: Save secrets for a cloud storage - requestBody: - required: true - content: - application/json: - schema: - $ref: "#/components/schemas/CloudStorageSecretPostList" - responses: - "201": - description: The secrets for cloud storage were saved - content: - "application/json": - schema: - $ref: "#/components/schemas/CloudStorageSecretGetList" - default: - $ref: "#/components/responses/Error" - tags: - - storages_v2 - delete: - summary: Remove all saved secrets for a storage - responses: - "204": - description: The secrets were removed or did not exist in the first place or the storage doesn't exist - default: - $ref: "#/components/responses/Error" - tags: - - storages_v2 /storage/{storage_id}: parameters: - in: path @@ -495,60 +333,6 @@ components: $ref: "#/components/schemas/RCloneOption" required: - storage - CloudStorageGetV2: - description: Get response for a V2 cloud storage. Contains field name and secret ID for saved secrets. - allOf: - - $ref: "#/components/schemas/CloudStorageGet" - - type: object - properties: - secrets: - type: array - items: - $ref: "#/components/schemas/CloudStorageSecretGet" - CloudStorageSecretPost: - type: object - description: Data for storing secret for a storage field - properties: - name: - type: string - description: Name of the field to store credential for - minLength: 1 - maxLength: 99 - value: - $ref: "#/components/schemas/SecretValueNullable" - required: - - name - - value - CloudStorageSecretPostList: - description: List of storage secrets that are saved - type: array - items: - $ref: "#/components/schemas/CloudStorageSecretPost" - CloudStorageSecretGetList: - description: List of storage secrets that are saved - type: array - items: - $ref: "#/components/schemas/CloudStorageSecretGet" - CloudStorageSecretGet: - type: object - description: Data for saved storage secrets - properties: - name: - type: string - description: Name of the field to store credential for - minLength: 1 - maxLength: 99 - secret_id: - $ref: "#/components/schemas/UlidId" - required: - - name - - secret_id - SecretValueNullable: - description: Secret value that can be any text - type: string - minLength: 1 - maxLength: 5000 - nullable: true RCloneSchema: description: List of RClone schemas for different storage types type: array diff --git a/components/renku_data_services/storage/apispec.py b/components/renku_data_services/storage/apispec.py index e746800d0..329a1e91b 100644 --- a/components/renku_data_services/storage/apispec.py +++ b/components/renku_data_services/storage/apispec.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: api.spec.yaml -# timestamp: 2024-10-07T08:21:24+00:00 +# timestamp: 2024-10-07T13:19:02+00:00 from __future__ import annotations @@ -101,23 +101,6 @@ class ErrorResponse(BaseAPISpec): error: Error -class StorageV2Params(BaseAPISpec): - model_config = ConfigDict( - extra="forbid", - ) - project_id: str = Field( - ..., - description="ULID identifier of an object", - max_length=26, - min_length=26, - pattern="^[A-Z0-9]+$", - ) - - -class StoragesV2GetParametersQuery(BaseAPISpec): - storage_v2_params: Optional[StorageV2Params] = None - - class StorageParams(BaseAPISpec): model_config = ConfigDict( extra="forbid", @@ -235,43 +218,6 @@ class CloudStorageGet(BaseAPISpec): sensitive_fields: Optional[List[RCloneOption]] = None -class CloudStorageSecretPost(BaseAPISpec): - name: str = Field( - ..., - description="Name of the field to store credential for", - max_length=99, - min_length=1, - ) - value: Optional[str] = Field( - ..., - description="Secret value that can be any text", - max_length=5000, - min_length=1, - ) - - -class CloudStorageSecretPostList(RootModel[List[CloudStorageSecretPost]]): - root: List[CloudStorageSecretPost] = Field( - ..., description="List of storage secrets that are saved" - ) - - -class CloudStorageSecretGet(BaseAPISpec): - name: str = Field( - ..., - description="Name of the field to store credential for", - max_length=99, - min_length=1, - ) - secret_id: str = Field( - ..., - description="ULID identifier of an object", - max_length=26, - min_length=26, - pattern="^[A-Z0-9]+$", - ) - - class RCloneEntry(BaseAPISpec): name: Optional[str] = Field(None, description="Human readable name of the provider") description: Optional[str] = Field(None, description="description of the provider") @@ -283,10 +229,6 @@ class RCloneEntry(BaseAPISpec): ) -class StoragesV2PostRequest(RootModel[Union[CloudStorage, CloudStorageUrl]]): - root: Union[CloudStorage, CloudStorageUrl] - - class StorageStorageIdPutRequest(RootModel[Union[CloudStorage, CloudStorageUrl]]): root: Union[CloudStorage, CloudStorageUrl] @@ -303,21 +245,7 @@ class StoragePostRequest(RootModel[Union[CloudStorage, CloudStorageUrl]]): root: Union[CloudStorage, CloudStorageUrl] -class CloudStorageGetV2(CloudStorageGet): - secrets: Optional[List[CloudStorageSecretGet]] = None - - -class CloudStorageSecretGetList(RootModel[List[CloudStorageSecretGet]]): - root: List[CloudStorageSecretGet] = Field( - ..., description="List of storage secrets that are saved" - ) - - class RCloneSchema(RootModel[List[RCloneEntry]]): root: List[RCloneEntry] = Field( ..., description="List of RClone schemas for different storage types" ) - - -class StoragesV2GetResponse(RootModel[List[CloudStorageGetV2]]): - root: List[CloudStorageGetV2] diff --git a/components/renku_data_services/storage/blueprints.py b/components/renku_data_services/storage/blueprints.py index 6c7c327df..ce72389ff 100644 --- a/components/renku_data_services/storage/blueprints.py +++ b/components/renku_data_services/storage/blueprints.py @@ -12,9 +12,9 @@ from renku_data_services import errors from renku_data_services.base_api.auth import authenticate from renku_data_services.base_api.blueprint import BlueprintFactoryResponse, CustomBlueprint -from renku_data_services.base_api.misc import validate_body_root_model, validate_query +from renku_data_services.base_api.misc import validate_query from renku_data_services.storage import apispec, models -from renku_data_services.storage.db import StorageRepository, StorageV2Repository +from renku_data_services.storage.db import StorageRepository from renku_data_services.storage.rclone import RCloneValidator @@ -28,15 +28,6 @@ def dump_storage_with_sensitive_fields(storage: models.CloudStorage, validator: ).model_dump(exclude_none=True) -def dump_storage_with_sensitive_fields_and_secrets( - storage: models.CloudStorage, validator: RCloneValidator -) -> dict[str, Any]: - """Dump a CloudStorage model alongside sensitive fields and its saved secrets.""" - dumped_storage = dump_storage_with_sensitive_fields(storage, validator) - dumped_storage["secrets"] = [apispec.CloudStorageSecretGet.model_validate(s).model_dump() for s in storage.secrets] - return dumped_storage - - @dataclass(kw_only=True) class StorageBP(CustomBlueprint): """Handlers for manipulating storage definitions.""" @@ -184,161 +175,6 @@ async def _delete(request: Request, user: base_models.APIUser, storage_id: ULID) return "/storage/", ["DELETE"], _delete -@dataclass(kw_only=True) -class StoragesV2BP(CustomBlueprint): - """Handlers for manipulating storage definitions.""" - - storage_v2_repo: StorageV2Repository - authenticator: base_models.Authenticator - - def get(self) -> BlueprintFactoryResponse: - """Get cloud storage for a repository.""" - - @authenticate(self.authenticator) - @validate_query(query=apispec.StorageV2Params) - async def _get( - request: Request, - user: base_models.APIUser, - validator: RCloneValidator, - query: apispec.StorageV2Params, - ) -> JSONResponse: - storage: list[models.CloudStorage] - storage = await self.storage_v2_repo.get_storage( - user=user, include_secrets=True, project_id=query.project_id - ) - - return json([dump_storage_with_sensitive_fields_and_secrets(s, validator) for s in storage]) - - return "/storages_v2", ["GET"], _get - - def get_one(self) -> BlueprintFactoryResponse: - """Get a single storage by id.""" - - @authenticate(self.authenticator) - async def _get_one( - request: Request, - user: base_models.APIUser, - storage_id: ULID, - validator: RCloneValidator, - ) -> JSONResponse: - storage = await self.storage_v2_repo.get_storage_by_id(storage_id, user=user) - - return json(dump_storage_with_sensitive_fields_and_secrets(storage, validator)) - - return "/storages_v2/", ["GET"], _get_one - - def post(self) -> BlueprintFactoryResponse: - """Create a new cloud storage entry.""" - - @authenticate(self.authenticator) - async def _post(request: Request, user: base_models.APIUser, validator: RCloneValidator) -> JSONResponse: - storage: models.UnsavedCloudStorage - if not isinstance(request.json, dict): - body_type = type(request.json) - raise errors.ValidationError( - message=f"The payload is supposed to be a dictionary, got {body_type.__name__}" - ) - if "storage_url" in request.json: - url_body = apispec.CloudStorageUrl(**request.json) - storage = models.UnsavedCloudStorage.from_url( - storage_url=url_body.storage_url, - project_id=url_body.project_id.root, - name=url_body.name, - target_path=url_body.target_path, - readonly=url_body.readonly, - ) - else: - body = apispec.CloudStorage(**request.json) - storage = models.UnsavedCloudStorage.from_dict(body.model_dump()) - - validator.validate(storage.configuration.model_dump()) - - res = await self.storage_v2_repo.insert_storage(storage=storage, user=user) - return json(dump_storage_with_sensitive_fields(res, validator), 201) - - return "/storages_v2", ["POST"], _post - - def patch(self) -> BlueprintFactoryResponse: - """Update parts of a storage entry.""" - - @authenticate(self.authenticator) - @validate(json=apispec.CloudStoragePatch) - async def _patch( - _: Request, - user: base_models.APIUser, - storage_id: ULID, - body: apispec.CloudStoragePatch, - validator: RCloneValidator, - ) -> JSONResponse: - existing_storage = await self.storage_v2_repo.get_storage_by_id(storage_id, user=user) - if body.configuration is not None: - # we need to apply the patch to the existing storage to properly validate it - body.configuration = {**existing_storage.configuration, **body.configuration} - - for k, v in list(body.configuration.items()): - if v is None: - # delete fields that were unset - del body.configuration[k] - validator.validate(body.configuration) - - body_dict = body.model_dump(exclude_none=True) - - res = await self.storage_v2_repo.update_storage(storage_id=storage_id, user=user, **body_dict) - return json(dump_storage_with_sensitive_fields(res, validator)) - - return "/storages_v2/", ["PATCH"], _patch - - def delete(self) -> BlueprintFactoryResponse: - """Delete a storage entry.""" - - @authenticate(self.authenticator) - async def _delete(request: Request, user: base_models.APIUser, storage_id: ULID) -> HTTPResponse: - await self.storage_v2_repo.delete_storage(storage_id=storage_id, user=user) - return empty(204) - - return "/storages_v2/", ["DELETE"], _delete - - def upsert_secrets(self) -> BlueprintFactoryResponse: - """Create/update secrets for a cloud storage.""" - - @authenticate(self.authenticator) - @validate_body_root_model(json=apispec.CloudStorageSecretPostList) - async def _upsert_secrets( - _: Request, user: base_models.APIUser, storage_id: ULID, body: apispec.CloudStorageSecretPostList - ) -> JSONResponse: - secrets = [models.CloudStorageSecretUpsert.model_validate(s.model_dump()) for s in body.root] - result = await self.storage_v2_repo.upsert_storage_secrets( - storage_id=storage_id, user=user, secrets=secrets - ) - return json( - apispec.CloudStorageSecretGetList.model_validate(result).model_dump(exclude_none=True, mode="json"), 201 - ) - - return "/storages_v2//secrets", ["POST"], _upsert_secrets - - def get_secrets(self) -> BlueprintFactoryResponse: - """Return all secrets for a cloud storage.""" - - @authenticate(self.authenticator) - async def _get_secrets(request: Request, user: base_models.APIUser, storage_id: ULID) -> JSONResponse: - result = await self.storage_v2_repo.get_storage_secrets(storage_id=storage_id, user=user) - return json( - apispec.CloudStorageSecretGetList.model_validate(result).model_dump(exclude_none=True, mode="json"), 200 - ) - - return "/storages_v2//secrets", ["GET"], _get_secrets - - def delete_secrets(self) -> BlueprintFactoryResponse: - """Delete all secrets for a cloud storage.""" - - @authenticate(self.authenticator) - async def _delete_secrets(request: Request, user: base_models.APIUser, storage_id: ULID) -> HTTPResponse: - await self.storage_v2_repo.delete_storage_secrets(storage_id=storage_id, user=user) - return HTTPResponse(status=204) - - return "/storages_v2//secrets", ["DELETE"], _delete_secrets - - @dataclass(kw_only=True) class StorageSchemaBP(CustomBlueprint): """Handler for getting RClone storage schema.""" diff --git a/components/renku_data_services/storage/db.py b/components/renku_data_services/storage/db.py index 2b381e1c9..7b4e3cda6 100644 --- a/components/renku_data_services/storage/db.py +++ b/components/renku_data_services/storage/db.py @@ -4,19 +4,13 @@ from typing import cast from cryptography.hazmat.primitives.asymmetric import rsa -from sqlalchemy import delete, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload from ulid import ULID import renku_data_services.base_models as base_models from renku_data_services import errors from renku_data_services.authz import models as authz_models -from renku_data_services.authz.authz import Authz, ResourceType -from renku_data_services.secrets import orm as secrets_schemas -from renku_data_services.secrets.core import encrypt_user_secret -from renku_data_services.secrets.models import SecretKind -from renku_data_services.secrets.orm import SecretORM from renku_data_services.storage import models from renku_data_services.storage import orm as schemas from renku_data_services.users.db import UserRepo @@ -54,7 +48,6 @@ async def get_storage( id: str | None = None, project_id: str | ULID | None = None, name: str | None = None, - include_secrets: bool = False, filter_by_access_level: bool = True, ) -> list[models.CloudStorage]: """Get a storage from the database.""" @@ -72,12 +65,6 @@ async def get_storage( stmt = stmt.where(schemas.CloudStorageORM.storage_id == id) if name is not None: stmt = stmt.where(schemas.CloudStorageORM.name == name) - if include_secrets: - stmt = stmt.options( - selectinload( - schemas.CloudStorageORM.secrets.and_(schemas.CloudStorageSecretsORM.user_id == user.id) - ) - ) res = await session.execute(stmt) storage_orms = res.scalars().all() @@ -93,7 +80,7 @@ async def get_storage( async def get_storage_by_id(self, storage_id: ULID, user: base_models.APIUser) -> models.CloudStorage: """Get a single storage by id.""" - storages = await self.get_storage(user, id=str(storage_id), include_secrets=True, filter_by_access_level=False) + storages = await self.get_storage(user, id=str(storage_id), filter_by_access_level=False) if not storages: raise errors.MissingResourceError(message=f"The storage with id '{storage_id}' cannot be found") @@ -166,99 +153,6 @@ async def delete_storage(self, storage_id: ULID, user: base_models.APIUser) -> N await session.delete(storage[0]) - async def upsert_storage_secrets( - self, storage_id: ULID, user: base_models.APIUser, secrets: list[models.CloudStorageSecretUpsert] - ) -> list[models.CloudStorageSecret]: - """Create/update cloud storage secrets.""" - # NOTE: Check that user has proper access to the storage - storage = await self.get_storage_by_id(storage_id=storage_id, user=user) - - secret_names_values = {s.name: s.value for s in secrets} - async with self.session_maker() as session, session.begin(): - stmt = ( - select(schemas.CloudStorageSecretsORM) - .where(schemas.CloudStorageSecretsORM.user_id == user.id) - .where(schemas.CloudStorageSecretsORM.storage_id == storage.storage_id) - .options(selectinload(schemas.CloudStorageSecretsORM.secret)) - ) - result = await session.execute(stmt) - existing_storage_secrets_orm = result.scalars().all() - - existing_secrets = {s.name: s for s in existing_storage_secrets_orm} - stored_secrets = [] - - for name, value in secret_names_values.items(): - if value is None: - # delete the secret - storage_secret_orm = existing_secrets.get(name) - if storage_secret_orm is None: - continue - await session.delete(storage_secret_orm) - await session.delete(storage_secret_orm.secret) - del existing_secrets[name] - continue - - encrypted_value, encrypted_key = await encrypt_user_secret( - user_repo=self.user_repo, - requested_by=user, - secret_service_public_key=self.secret_service_public_key, - secret_value=value, - ) - - if storage_secret_orm := existing_secrets.get(name): - storage_secret_orm.secret.update(encrypted_value=encrypted_value, encrypted_key=encrypted_key) - else: - secret_orm = secrets_schemas.SecretORM( - name=f"{storage_id}-{name}", - user_id=cast(str, user.id), - encrypted_value=encrypted_value, - encrypted_key=encrypted_key, - kind=SecretKind.storage, - ) - session.add(secret_orm) - - storage_secret_orm = schemas.CloudStorageSecretsORM( - user_id=cast(str, user.id), - storage_id=storage_id, - name=name, - secret_id=secret_orm.id, - ) - session.add(storage_secret_orm) - - stored_secrets.append(storage_secret_orm.dump()) - - return stored_secrets - - async def get_storage_secrets(self, storage_id: ULID, user: base_models.APIUser) -> list[models.CloudStorageSecret]: - """Get cloud storage secrets.""" - async with self.session_maker() as session, session.begin(): - stmt = ( - select(schemas.CloudStorageSecretsORM) - .where(schemas.CloudStorageSecretsORM.user_id == user.id) - .where(schemas.CloudStorageSecretsORM.storage_id == storage_id) - ) - result = await session.execute(stmt) - storage_secrets_orm = result.scalars().all() - - return [s.dump() for s in storage_secrets_orm] - - async def delete_storage_secrets(self, storage_id: ULID, user: base_models.APIUser) -> None: - """Delete cloud storage secrets.""" - async with self.session_maker() as session, session.begin(): - stmt = ( - delete(SecretORM) - .where(schemas.CloudStorageSecretsORM.secret_id == SecretORM.id) - .where(schemas.CloudStorageSecretsORM.user_id == user.id) - .where(schemas.CloudStorageSecretsORM.storage_id == storage_id) - ) - await session.execute(stmt) - stmt = ( - delete(schemas.CloudStorageSecretsORM) - .where(schemas.CloudStorageSecretsORM.user_id == user.id) - .where(schemas.CloudStorageSecretsORM.storage_id == storage_id) - ) - await session.execute(stmt) - class StorageRepository(BaseStorageRepository): """Repository for V1 cloud storage.""" @@ -284,31 +178,3 @@ async def filter_projects_by_access_level( ) return await self.gitlab_client.filter_projects_by_access_level(user, project_ids, gitlab_access_level) - - -class StorageV2Repository(BaseStorageRepository): - """Repository for V2 cloud storage.""" - - def __init__( - self, - project_authz: Authz, - session_maker: Callable[..., AsyncSession], - user_repo: UserRepo, - secret_service_public_key: rsa.RSAPublicKey, - ) -> None: - super().__init__(session_maker, user_repo, secret_service_public_key) - self.project_authz: Authz = project_authz - - async def filter_projects_by_access_level( - self, user: base_models.APIUser, project_ids: list[str], minimum_access_level: authz_models.Role - ) -> list[str]: - """Get a list of project IDs of which the user is a member with a specific access level.""" - if not user.is_authenticated or not project_ids: - return [] - - scope = authz_models.Scope.WRITE if minimum_access_level == authz_models.Role.OWNER else authz_models.Scope.READ - output = [] - for id in project_ids: - if await self.project_authz.has_permission(user, ResourceType.project, ULID.from_str(id), scope): - output.append(id) - return output diff --git a/components/renku_data_services/storage/models.py b/components/renku_data_services/storage/models.py index 7db75cce0..4817e9931 100644 --- a/components/renku_data_services/storage/models.py +++ b/components/renku_data_services/storage/models.py @@ -70,8 +70,6 @@ class UnsavedCloudStorage(BaseModel): target_path: str = Field(min_length=1) """Path inside the target repository to mount/clone data to.""" - secrets: list["CloudStorageSecret"] = Field(default_factory=list) - @classmethod def from_dict(cls, data: dict) -> "UnsavedCloudStorage": """Create the model from a plain dictionary.""" @@ -230,26 +228,3 @@ class CloudStorage(UnsavedCloudStorage): """Cloudstorage saved in the database.""" storage_id: ULID = Field(default=None) - - -class CloudStorageSecret(BaseModel): - """Cloud storage secret model.""" - - user_id: str = Field() - storage_id: ULID = Field() - name: str = Field(min_length=1, max_length=99) - secret_id: ULID = Field() - - @classmethod - def from_dict(cls, data: dict) -> "CloudStorageSecret": - """Create the model from a plain dictionary.""" - return cls( - user_id=data["user_id"], storage_id=data["storage_id"], name=data["name"], secret_id=data["secret_id"] - ) - - -class CloudStorageSecretUpsert(BaseModel): - """Insert/update storage secret data.""" - - name: str = Field() - value: str | None = Field() diff --git a/components/renku_data_services/storage/orm.py b/components/renku_data_services/storage/orm.py index e127cc439..cf5fe9106 100644 --- a/components/renku_data_services/storage/orm.py +++ b/components/renku_data_services/storage/orm.py @@ -2,15 +2,13 @@ from typing import Any -from sqlalchemy import JSON, Boolean, ForeignKey, MetaData, String +from sqlalchemy import JSON, Boolean, MetaData, String from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship -from sqlalchemy.schema import Index, UniqueConstraint +from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column +from sqlalchemy.schema import UniqueConstraint from ulid import ULID -from renku_data_services.secrets.orm import SecretORM from renku_data_services.storage import models -from renku_data_services.users.orm import UserORM from renku_data_services.utils.sqlalchemy import ULIDType JSONVariant = JSON().with_variant(JSONB(), "postgresql") @@ -55,11 +53,6 @@ class CloudStorageORM(BaseORM): ) """Id of this storage.""" - secrets: Mapped[list["CloudStorageSecretsORM"]] = relationship( - lazy="noload", init=False, viewonly=True, default_factory=list - ) - """Saved secrets for the storage.""" - __table_args__ = ( UniqueConstraint( "project_id", @@ -92,41 +85,4 @@ def dump(self) -> models.CloudStorage: target_path=self.target_path, storage_id=self.storage_id, readonly=self.readonly, - secrets=[s.dump() for s in self.secrets], - ) - - -class CloudStorageSecretsORM(BaseORM): - """Secrets for cloud storages.""" - - __tablename__ = "cloud_storage_secrets" - __table_args__ = (Index("ix_storage_cloud_storage_secrets_user_id_storage_id", "user_id", "storage_id"),) - - user_id: Mapped[str] = mapped_column( - "user_id", ForeignKey(UserORM.keycloak_id, ondelete="CASCADE"), primary_key=True - ) - - storage_id: Mapped[ULID] = mapped_column( - "storage_id", ForeignKey(CloudStorageORM.storage_id, ondelete="CASCADE"), primary_key=True - ) - - name: Mapped[str] = mapped_column("name", String(), primary_key=True) - - secret_id: Mapped[ULID] = mapped_column("secret_id", ForeignKey(SecretORM.id, ondelete="CASCADE")) - secret: Mapped[SecretORM] = relationship(init=False, repr=False, lazy="selectin") - - @classmethod - def load(cls, storage_secret: models.CloudStorageSecret) -> "CloudStorageSecretsORM": - """Create an instance from the cloud storage secret model.""" - return cls( - user_id=storage_secret.user_id, - storage_id=storage_secret.storage_id, - name=storage_secret.name, - secret_id=storage_secret.secret_id, - ) - - def dump(self) -> models.CloudStorageSecret: - """Create a cloud storage secret model from the ORM object.""" - return models.CloudStorageSecret( - user_id=self.user_id, storage_id=self.storage_id, name=self.name, secret_id=self.secret_id ) diff --git a/projects/background_jobs/pyproject.toml b/projects/background_jobs/pyproject.toml index cb13e197f..582755859 100644 --- a/projects/background_jobs/pyproject.toml +++ b/projects/background_jobs/pyproject.toml @@ -32,6 +32,7 @@ packages = [ { include = "renku_data_services/repositories", from = "../../components" }, { include = "renku_data_services/session", from = "../../components" }, { include = "renku_data_services/platform", from = "../../components" }, + { include = "renku_data_services/data_connectors", from = "../../components" }, { include = "renku_data_services/migrations", from = "../../components" }, ] diff --git a/projects/renku_data_service/pyproject.toml b/projects/renku_data_service/pyproject.toml index 4b21c1001..a1a67e662 100644 --- a/projects/renku_data_service/pyproject.toml +++ b/projects/renku_data_service/pyproject.toml @@ -29,6 +29,7 @@ packages = [ { include = "renku_data_services/storage", from = "../../components" }, { include = "renku_data_services/users", from = "../../components" }, { include = "renku_data_services/utils", from = "../../components" }, + { include = "renku_data_services/data_connectors", from = "../../components" }, # Note: poetry poly does not detect the migrations as dependencies, but they are. Don't remove these! { include = "renku_data_services/migrations", from = "../../components" }, ] diff --git a/projects/secrets_storage/pyproject.toml b/projects/secrets_storage/pyproject.toml index 3f442de7c..3647a0c85 100644 --- a/projects/secrets_storage/pyproject.toml +++ b/projects/secrets_storage/pyproject.toml @@ -31,6 +31,7 @@ packages = [ { include = "renku_data_services/storage", from = "../../components" }, { include = "renku_data_services/users", from = "../../components" }, { include = "renku_data_services/utils", from = "../../components" }, + { include = "renku_data_services/data_connectors", from = "../../components" }, ] [tool.poetry.dependencies] diff --git a/pyproject.toml b/pyproject.toml index 47d9e65a9..bed423950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ packages = [ { include = "renku_data_services/repositories", from = "components" }, { include = "renku_data_services/notebooks", from = "components" }, { include = "renku_data_services/platform", from = "components" }, + { include = "renku_data_services/data_connectors", from = "components" }, ] [tool.poetry.dependencies] @@ -191,6 +192,7 @@ disallow_untyped_defs = true module = [ "renku_data_services.crc.apispec", "renku_data_services.connected_services.apispec", + "renku_data_services.data_connectors.apispec", "renku_data_services.storage.apispec", "renku_data_services.project.apispec", "renku_data_services.repositories.apispec", diff --git a/test/bases/renku_data_services/background_jobs/test_sync.py b/test/bases/renku_data_services/background_jobs/test_sync.py index 8b70478a5..bd1365b59 100644 --- a/test/bases/renku_data_services/background_jobs/test_sync.py +++ b/test/bases/renku_data_services/background_jobs/test_sync.py @@ -26,10 +26,13 @@ bootstrap_user_namespaces, fix_mismatched_project_namespace_ids, migrate_groups_make_all_public, + migrate_storages_v2_to_data_connectors, migrate_user_namespaces_make_all_public, ) from renku_data_services.base_api.pagination import PaginationRequest from renku_data_services.base_models import APIUser +from renku_data_services.data_connectors.db import DataConnectorProjectLinkRepository, DataConnectorRepository +from renku_data_services.data_connectors.migration_utils import DataConnectorMigrationTool from renku_data_services.db_config import DBConfig from renku_data_services.errors import errors from renku_data_services.message_queue.config import RedisConfig @@ -44,6 +47,8 @@ from renku_data_services.namespace.orm import NamespaceORM from renku_data_services.project.db import ProjectRepository from renku_data_services.project.models import UnsavedProject +from renku_data_services.storage.models import UnsavedCloudStorage +from renku_data_services.storage.orm import CloudStorageORM from renku_data_services.users.db import UserRepo, UsersSync from renku_data_services.users.dummy_kc_api import DummyKeycloakAPI from renku_data_services.users.models import KeycloakAdminEvent, UnsavedUserInfo, UserInfo, UserInfoFieldUpdate @@ -74,6 +79,21 @@ def _get_app_configs( group_repo=group_repo, authz=Authz(authz_config), ) + data_connector_repo = DataConnectorRepository( + session_maker=db_config.async_session_maker, + authz=Authz(authz_config), + ) + data_connector_project_link_repo = DataConnectorProjectLinkRepository( + session_maker=db_config.async_session_maker, + authz=Authz(authz_config), + ) + data_connector_migration_tool = DataConnectorMigrationTool( + session_maker=db_config.async_session_maker, + data_connector_repo=data_connector_repo, + data_connector_project_link_repo=data_connector_project_link_repo, + project_repo=project_repo, + authz=Authz(authz_config), + ) user_repo = UserRepo( db_config.async_session_maker, message_queue=message_queue, @@ -96,8 +116,9 @@ def _get_app_configs( authz_config=authz_config, group_repo=group_repo, event_repo=event_repo, - session_maker=db_config.async_session_maker, project_repo=project_repo, + data_connector_migration_tool=data_connector_migration_tool, + session_maker=db_config.async_session_maker, ) run_migrations_for_app("common") return config, user_repo @@ -978,3 +999,83 @@ async def test_migrate_user_namespaces_make_all_public( assert ns.slug == "john.doe" assert ns.kind.value == "user" assert ns.created_by == user.id + + +@pytest.mark.asyncio +async def test_migrate_storages_v2(get_app_configs: Callable[..., tuple[SyncConfig, UserRepo]], admin_user: APIUser): + admin_user_info = UserInfo( + id=admin_user.id, + first_name=admin_user.first_name, + last_name=admin_user.last_name, + email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), + ) + user = UserInfo( + id="user-1-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + user_api = APIUser(is_admin=False, id=user.id, access_token="access_token") + user_roles = {admin_user.id: get_kc_roles(["renku-admin"])} + kc_api = DummyKeycloakAPI(users=get_kc_users([admin_user_info, user]), user_roles=user_roles) + sync_config, _ = get_app_configs(kc_api) + # Sync users + await sync_config.syncer.users_sync(kc_api) + + # Create a project and a storage_v2 attached to it + project_payload = UnsavedProject( + name="project-1", slug="project-1", namespace=user.namespace.slug, created_by=user.id, visibility="private" + ) + project = await sync_config.project_repo.insert_project(user_api, project_payload) + unsaved_storage = UnsavedCloudStorage.from_url( + storage_url="s3://my-bucket", + name="storage-1", + readonly=True, + project_id=str(project.id), + target_path="my_data", + ) + storage_orm = CloudStorageORM.load(unsaved_storage) + async with sync_config.session_maker() as session, session.begin(): + session.add(storage_orm) + storage_v2 = storage_orm.dump() + + await migrate_storages_v2_to_data_connectors(sync_config) + + # After the migration, there is a new data connector + data_connector_repo = sync_config.data_connector_migration_tool.data_connector_repo + data_connectors, data_connectors_count = await data_connector_repo.get_data_connectors( + user=user_api, + pagination=PaginationRequest(1, 100), + ) + assert data_connectors is not None + assert data_connectors_count == 1 + data_connector = data_connectors[0] + assert data_connector.name == storage_v2.name + assert data_connector.storage.storage_type == storage_v2.storage_type + assert data_connector.storage.readonly == storage_v2.readonly + assert data_connector.storage.source_path == storage_v2.source_path + assert data_connector.storage.target_path == storage_v2.target_path + assert data_connector.created_by == user.id + + data_connector_project_link_repo = sync_config.data_connector_migration_tool.data_connector_project_link_repo + links = await data_connector_project_link_repo.get_links_to(user=user_api, project_id=project.id) + assert links is not None + assert len(links) == 1 + link = links[0] + assert link.project_id == project.id + assert link.data_connector_id == data_connector.id + assert link.created_by == user.id diff --git a/test/bases/renku_data_services/data_api/test_data_connectors.py b/test/bases/renku_data_services/data_api/test_data_connectors.py new file mode 100644 index 000000000..cd38306ec --- /dev/null +++ b/test/bases/renku_data_services/data_api/test_data_connectors.py @@ -0,0 +1,1185 @@ +from typing import Any + +import pytest +from sanic_testing.testing import SanicASGITestClient + +from renku_data_services.users.models import UserInfo +from test.bases.renku_data_services.data_api.utils import merge_headers + + +@pytest.fixture +def create_data_connector(sanic_client: SanicASGITestClient, regular_user, user_headers): + async def create_data_connector_helper( + name: str, user: UserInfo | None = None, headers: dict[str, str] | None = None, **payload + ) -> dict[str, Any]: + user = user or regular_user + headers = headers or user_headers + dc_payload = { + "name": name, + "description": "A data connector", + "visibility": "private", + "namespace": f"{user.first_name}.{user.last_name}", + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + "region": "us-east-1", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + "keywords": ["keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"], + } + dc_payload.update(payload) + + _, response = await sanic_client.post("/api/data/data_connectors", headers=headers, json=dc_payload) + + assert response.status_code == 201, response.text + return response.json + + return create_data_connector_helper + + +@pytest.mark.asyncio +async def test_post_data_connector(sanic_client: SanicASGITestClient, regular_user, user_headers) -> None: + payload = { + "name": "My data connector", + "slug": "my-data-connector", + "description": "A data connector", + "visibility": "public", + "namespace": f"{regular_user.first_name}.{regular_user.last_name}", + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + "region": "us-east-1", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + "keywords": ["keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"], + } + + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 201, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("name") == "My data connector" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "my-data-connector" + assert data_connector.get("storage") is not None + storage = data_connector["storage"] + assert storage.get("storage_type") == "s3" + assert storage.get("source_path") == "bucket/my-folder" + assert storage.get("target_path") == "my/target" + assert storage.get("readonly") is True + assert data_connector.get("created_by") == "user" + assert data_connector.get("visibility") == "public" + assert data_connector.get("description") == "A data connector" + assert set(data_connector.get("keywords")) == {"keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"} + + # Check that we can retrieve the data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector["id"]}", headers=user_headers) + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json.get("id") == data_connector["id"] + + # Check that we can retrieve the data connector by slug + _, response = await sanic_client.get( + f"/api/data/namespaces/{data_connector["namespace"]}/data_connectors/{data_connector["slug"]}", + headers=user_headers, + ) + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json.get("id") == data_connector["id"] + + +@pytest.mark.asyncio +async def test_post_data_connector_with_s3_url(sanic_client: SanicASGITestClient, regular_user, user_headers) -> None: + payload = { + "name": "My data connector", + "slug": "my-data-connector", + "description": "A data connector", + "visibility": "public", + "namespace": f"{regular_user.first_name}.{regular_user.last_name}", + "storage": { + "storage_url": "s3://my-bucket", + "target_path": "my/target", + }, + "keywords": ["keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"], + } + + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 201, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("name") == "My data connector" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "my-data-connector" + assert data_connector.get("storage") is not None + storage = data_connector["storage"] + assert storage.get("storage_type") == "s3" + assert storage.get("source_path") == "my-bucket" + assert storage.get("target_path") == "my/target" + assert storage.get("readonly") is True + assert data_connector.get("created_by") == "user" + assert data_connector.get("visibility") == "public" + assert data_connector.get("description") == "A data connector" + assert set(data_connector.get("keywords")) == {"keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"} + + +@pytest.mark.asyncio +async def test_post_data_connector_with_azure_url( + sanic_client: SanicASGITestClient, regular_user, user_headers +) -> None: + payload = { + "name": "My data connector", + "slug": "my-data-connector", + "description": "A data connector", + "visibility": "public", + "namespace": f"{regular_user.first_name}.{regular_user.last_name}", + "storage": { + "storage_url": "azure://mycontainer/myfolder", + "target_path": "my/target", + }, + "keywords": ["keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"], + } + + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 201, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("name") == "My data connector" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "my-data-connector" + assert data_connector.get("storage") is not None + storage = data_connector["storage"] + assert storage.get("storage_type") == "azureblob" + assert storage.get("source_path") == "mycontainer/myfolder" + assert storage.get("target_path") == "my/target" + assert storage.get("readonly") is True + assert data_connector.get("created_by") == "user" + assert data_connector.get("visibility") == "public" + assert data_connector.get("description") == "A data connector" + assert set(data_connector.get("keywords")) == {"keyword 1", "keyword.2", "keyword-3", "KEYWORD_4"} + + +@pytest.mark.asyncio +async def test_post_data_connector_with_invalid_visibility(sanic_client: SanicASGITestClient, user_headers) -> None: + payload = {"visibility": "random"} + + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 422, response.text + assert "visibility: Input should be 'private' or 'public'" in response.json["error"]["message"] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("keyword", ["invalid chars '", "Nön English"]) +async def test_post_data_connector_with_invalid_keywords( + sanic_client: SanicASGITestClient, user_headers, keyword +) -> None: + payload = {"keywords": [keyword]} + + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 422, response.text + assert "String should match pattern '^[A-Za-z0-9\\s\\-_.]*$'" in response.json["error"]["message"] + + +@pytest.mark.asyncio +async def test_post_data_connector_with_invalid_namespace( + sanic_client: SanicASGITestClient, user_headers, member_1_user +) -> None: + namespace = f"{member_1_user.first_name}.{member_1_user.last_name}" + _, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers) + assert response.status_code == 200, response.text + + payload = { + "name": "My data connector", + "namespace": namespace, + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + "region": "us-east-1", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + } + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 403, response.text + assert "you do not have sufficient permissions" in response.json["error"]["message"] + + +@pytest.mark.asyncio +async def test_post_data_connector_with_conflicting_slug( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector_1 = await create_data_connector("Data connector 1") + + payload = { + "name": "My data connector", + "namespace": data_connector_1["namespace"], + "slug": data_connector_1["slug"], + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + } + _, response = await sanic_client.post("/api/data/data_connectors", headers=user_headers, json=payload) + + assert response.status_code == 409, response.text + + +@pytest.mark.asyncio +@pytest.mark.parametrize("headers_and_error", [("unauthorized_headers", 401), ("member_1_headers", 403)]) +async def test_post_data_connector_without_namespace_permission( + sanic_client: SanicASGITestClient, user_headers, headers_and_error, request +) -> None: + headers_name, status_code = headers_and_error + + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + + headers = request.getfixturevalue(headers_name) + payload = { + "name": "My data connector", + "namespace": "my-group", + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + } + _, response = await sanic_client.post("/api/data/data_connectors", headers=headers, json=payload) + + assert response.status_code == status_code, response.text + + +@pytest.mark.asyncio +async def test_post_data_connector_with_namespace_permission( + sanic_client: SanicASGITestClient, user_headers, member_1_headers, member_1_user +) -> None: + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + patch = [{"id": member_1_user.id, "role": "editor"}] + _, response = await sanic_client.patch("/api/data/groups/my-group/members", headers=user_headers, json=patch) + assert response.status_code == 200 + + payload = { + "name": "My data connector", + "namespace": "my-group", + "storage": { + "configuration": { + "type": "s3", + "provider": "AWS", + }, + "source_path": "bucket/my-folder", + "target_path": "my/target", + }, + } + _, response = await sanic_client.post("/api/data/data_connectors", headers=member_1_headers, json=payload) + + assert response.status_code == 201, response.text + + +@pytest.mark.asyncio +async def test_get_all_data_connectors_pagination( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + for i in range(1, 10): + await create_data_connector(f"Data connector {i}") + + parameters = {"page": 2, "per_page": 3} + _, response = await sanic_client.get("/api/data/data_connectors", headers=user_headers, params=parameters) + + assert response.status_code == 200, response.text + assert response.json is not None + data_connectors = response.json + assert {dc["name"] for dc in data_connectors} == { + "Data connector 4", + "Data connector 5", + "Data connector 6", + } + assert response.headers["page"] == "2" + assert response.headers["per-page"] == "3" + assert response.headers["total"] == "9" + assert response.headers["total-pages"] == "3" + + +@pytest.mark.asyncio +async def test_get_one_data_connector(sanic_client: SanicASGITestClient, create_data_connector, user_headers) -> None: + data_connector = await create_data_connector("A new data connector") + data_connector_id = data_connector["id"] + + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + + assert response.status_code == 200, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("id") == data_connector_id + assert data_connector.get("name") == "A new data connector" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "a-new-data-connector" + + +@pytest.mark.asyncio +async def test_get_one_by_slug_data_connector( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("A new data connector") + namespace = data_connector["namespace"] + slug = data_connector["slug"] + + _, response = await sanic_client.get( + f"/api/data/namespaces/{namespace}/data_connectors/{slug}", headers=user_headers + ) + + assert response.status_code == 200, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("id") == data_connector["id"] + assert data_connector.get("name") == "A new data connector" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "a-new-data-connector" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("headers_name", ["unauthorized_headers", "member_1_headers"]) +async def test_get_one_data_connector_unauthorized( + sanic_client: SanicASGITestClient, create_data_connector, headers_name, request +) -> None: + data_connector = await create_data_connector("A new data connector") + data_connector_id = data_connector["id"] + + headers = request.getfixturevalue(headers_name) + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=headers) + + assert response.status_code == 404, response.text + + +@pytest.mark.asyncio +async def test_patch_data_connector(sanic_client: SanicASGITestClient, create_data_connector, user_headers) -> None: + data_connector = await create_data_connector("My data connector") + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = { + "name": "New Name", + "description": "Updated data connector", + "keywords": ["keyword 1", "keyword 2"], + "visibility": "public", + "storage": { + "configuration": {"type": "azureblob"}, + "source_path": "new/src", + "target_path": "new/target", + "readonly": False, + }, + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + + assert response.status_code == 200, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("name") == "New Name" + assert data_connector.get("namespace") == "user.doe" + assert data_connector.get("slug") == "my-data-connector" + assert data_connector.get("storage") is not None + storage = data_connector["storage"] + assert storage.get("storage_type") == "azureblob" + assert storage.get("source_path") == "new/src" + assert storage.get("target_path") == "new/target" + assert storage.get("readonly") is False + assert data_connector.get("created_by") == "user" + assert data_connector.get("visibility") == "public" + assert data_connector.get("description") == "Updated data connector" + assert set(data_connector.get("keywords")) == {"keyword 1", "keyword 2"} + + +@pytest.mark.asyncio +async def test_patch_data_connector_can_unset_storage_field( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + initial_storage = { + "configuration": { + "provider": "AWS", + "type": "s3", + "region": "us-east-1", + "access_key_id": "ACCESS KEY", + "secret_access_key": "SECRET", + }, + "source_path": "my-bucket", + "target_path": "my_data", + } + data_connector = await create_data_connector("My data connector", storage=initial_storage) + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + data_connector_id = data_connector["id"] + patch = {"storage": {"configuration": {"region": None, "access_key_id": None, "secret_access_key": None}}} + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + + assert response.status_code == 200, response.text + assert response.json is not None + new_configuration = response.json["storage"]["configuration"] + assert new_configuration is not None + assert new_configuration["provider"] == "AWS" + assert new_configuration["type"] == "s3" + assert "region" not in new_configuration + assert "access_key_id" not in new_configuration + assert "secret_access_key" not in new_configuration + assert len(response.json["storage"]["sensitive_fields"]) == 0 + + +@pytest.mark.asyncio +async def test_patch_data_connector_visibility_to_private_hides_data_connector( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector", visibility="public") + + _, response = await sanic_client.get("/api/data/data_connectors") + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json[0]["name"] == "My data connector" + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = { + "visibility": "private", + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + assert response.status_code == 200, response.text + + _, response = await sanic_client.get("/api/data/data_connectors") + + assert len(response.json) == 0 + + +@pytest.mark.asyncio +async def test_patch_data_connector_visibility_to_public_shows_data_connector( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector", visibility="private") + + _, response = await sanic_client.get("/api/data/data_connectors") + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = { + "visibility": "public", + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + assert response.status_code == 200, response.text + + _, response = await sanic_client.get("/api/data/data_connectors") + + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json[0]["name"] == "My data connector" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("field", ["id", "created_by", "creation_date"]) +async def test_patch_data_connector_reserved_fields_are_forbidden( + sanic_client: SanicASGITestClient, create_data_connector, user_headers, field +) -> None: + data_connector = await create_data_connector("My data connector") + original_value = data_connector[field] + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = { + field: "new-value", + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + + assert response.status_code == 422, response.text + assert f"{field}: Extra inputs are not permitted" in response.text + + # Check that the field's value didn't change + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + assert response.status_code == 200, response.text + data_connector = response.json + assert data_connector[field] == original_value + + +@pytest.mark.asyncio +async def test_patch_data_connector_without_if_match_header( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector") + original_value = data_connector["name"] + + patch = { + "name": "New Name", + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=user_headers, json=patch + ) + + assert response.status_code == 428, response.text + assert "If-Match header not provided" in response.text + + # Check that the field's value didn't change + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + assert response.status_code == 200, response.text + data_connector = response.json + assert data_connector["name"] == original_value + + +@pytest.mark.asyncio +async def test_patch_data_connector_namespace( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + data_connector = await create_data_connector("My data connector") + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = {"namespace": "my-group"} + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + + assert response.status_code == 200, response.text + assert response.json is not None + data_connector = response.json + assert data_connector.get("id") == data_connector_id + assert data_connector.get("name") == "My data connector" + assert data_connector.get("namespace") == "my-group" + assert data_connector.get("slug") == "my-data-connector" + + # Check that we can retrieve the data connector by slug + _, response = await sanic_client.get( + f"/api/data/namespaces/{data_connector["namespace"]}/data_connectors/{data_connector["slug"]}", + headers=user_headers, + ) + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json.get("id") == data_connector["id"] + + +@pytest.mark.asyncio +async def test_patch_data_connector_with_invalid_namespace( + sanic_client: SanicASGITestClient, create_data_connector, user_headers, member_1_user +) -> None: + namespace = f"{member_1_user.first_name}.{member_1_user.last_name}" + _, response = await sanic_client.get(f"/api/data/namespaces/{namespace}", headers=user_headers) + assert response.status_code == 200, response.text + data_connector = await create_data_connector("My data connector") + + headers = merge_headers(user_headers, {"If-Match": data_connector["etag"]}) + patch = { + "namespace": namespace, + } + data_connector_id = data_connector["id"] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}", headers=headers, json=patch + ) + + assert response.status_code == 403, response.text + assert "you do not have sufficient permissions" in response.json["error"]["message"] + + +@pytest.mark.asyncio +async def test_delete_data_connector(sanic_client: SanicASGITestClient, create_data_connector, user_headers) -> None: + await create_data_connector("Data connector 1") + data_connector = await create_data_connector("Data connector 2") + await create_data_connector("Data connector 3") + + data_connector_id = data_connector["id"] + _, response = await sanic_client.delete(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + + assert response.status_code == 204, response.text + + _, response = await sanic_client.get("/api/data/data_connectors", headers=user_headers) + + assert response.status_code == 200, response.text + assert {dc["name"] for dc in response.json} == {"Data connector 1", "Data connector 3"} + + +@pytest.mark.asyncio +async def test_get_data_connector_project_links_empty( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + + data_connector_id = data_connector["id"] + _, response = await sanic_client.get( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers + ) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + +@pytest.mark.asyncio +async def test_post_data_connector_project_link( + sanic_client: SanicASGITestClient, create_data_connector, create_project, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + project = await create_project("Project A") + + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + + assert response.status_code == 201, response.text + assert response.json is not None + link = response.json + assert link.get("data_connector_id") == data_connector_id + assert link.get("project_id") == project_id + assert link.get("created_by") == "user" + + # Check that the links list from the data connector is not empty now + _, response = await sanic_client.get( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers + ) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 1 + assert response.json[0].get("id") == link["id"] + assert response.json[0].get("data_connector_id") == data_connector_id + assert response.json[0].get("project_id") == project_id + + # Check that the links list to the project is not empty now + _, response = await sanic_client.get(f"/api/data/projects/{project_id}/data_connector_links", headers=user_headers) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 1 + assert response.json[0].get("id") == link["id"] + assert response.json[0].get("data_connector_id") == data_connector_id + assert response.json[0].get("project_id") == project_id + + +@pytest.mark.asyncio +async def test_post_data_connector_project_link_already_exists( + sanic_client: SanicASGITestClient, create_data_connector, create_project, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + project = await create_project("Project A") + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 409, response.text + + +@pytest.mark.asyncio +async def test_post_data_connector_project_link_unauthorized_if_not_project_editor( + sanic_client: SanicASGITestClient, + create_data_connector, + create_project, + user_headers, + member_1_headers, + member_1_user, +) -> None: + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + patch = [{"id": member_1_user.id, "role": "owner"}] + _, response = await sanic_client.patch("/api/data/groups/my-group/members", headers=user_headers, json=patch) + assert response.status_code == 200 + data_connector = await create_data_connector("Data connector 1", namespace="my-group") + data_connector_id = data_connector["id"] + project = await create_project("Project A") + project_id = project["id"] + patch = [{"id": member_1_user.id, "role": "viewer"}] + _, response = await sanic_client.patch(f"/api/data/projects/{project_id}/members", headers=user_headers, json=patch) + assert response.status_code == 200, response.text + + # Check that "member_1" can view the project and data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=member_1_headers, json=payload + ) + + assert response.status_code == 404, response.text + + +@pytest.mark.asyncio +async def test_post_data_connector_project_link_unauthorized_if_not_data_connector_editor( + sanic_client: SanicASGITestClient, + create_data_connector, + create_project, + user_headers, + member_1_headers, + member_1_user, +) -> None: + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + patch = [{"id": member_1_user.id, "role": "viewer"}] + _, response = await sanic_client.patch("/api/data/groups/my-group/members", headers=user_headers, json=patch) + assert response.status_code == 200 + data_connector = await create_data_connector("Data connector 1", namespace="my-group") + data_connector_id = data_connector["id"] + project = await create_project("Project A") + project_id = project["id"] + patch = [{"id": member_1_user.id, "role": "owner"}] + _, response = await sanic_client.patch(f"/api/data/projects/{project_id}/members", headers=user_headers, json=patch) + assert response.status_code == 200, response.text + + # Check that "member_1" can view the project and data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=member_1_headers, json=payload + ) + + assert response.status_code == 404, response.text + + +@pytest.mark.asyncio +async def test_post_data_connector_project_link_public_data_connector( + sanic_client: SanicASGITestClient, + create_data_connector, + create_project, + user_headers, + member_1_headers, + member_1_user, +) -> None: + data_connector = await create_data_connector( + "Data connector 1", user=member_1_user, headers=member_1_headers, visibility="public" + ) + data_connector_id = data_connector["id"] + project = await create_project("Project A") + project_id = project["id"] + + # Check that "regular_user" can view the project and data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + assert response.status_code == 200, response.text + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=user_headers) + assert response.status_code == 200, response.text + + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + + assert response.status_code == 201, response.text + assert response.json is not None + link = response.json + assert link.get("data_connector_id") == data_connector_id + assert link.get("project_id") == project_id + assert link.get("created_by") == "user" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("project_role", ["viewer", "editor", "owner"]) +async def test_post_data_connector_project_link_extends_read_access( + sanic_client: SanicASGITestClient, + create_data_connector, + create_project, + user_headers, + member_1_headers, + member_1_user, + project_role, +) -> None: + data_connector = await create_data_connector("Data connector 1") + data_connector_id = data_connector["id"] + project = await create_project("Project A") + project_id = project["id"] + patch = [{"id": member_1_user.id, "role": project_role}] + _, response = await sanic_client.patch(f"/api/data/projects/{project_id}/members", headers=user_headers, json=patch) + assert response.status_code == 200, response.text + + # Check that "member_1" can view the project + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + # Check that "member_1" cannot view the data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 404, response.text + + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + + # Check that "member_1" can now view the data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + assert response.json is not None + assert response.json.get("id") == data_connector_id + assert response.json.get("name") == "Data connector 1" + assert response.json.get("namespace") == "user.doe" + assert response.json.get("slug") == "data-connector-1" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("group_role", ["viewer", "editor", "owner"]) +async def test_post_data_connector_project_link_does_not_extend_access_to_parent_group_members( + sanic_client: SanicASGITestClient, + create_data_connector, + user_headers, + member_1_headers, + member_1_user, + group_role, +) -> None: + data_connector = await create_data_connector("Data connector 1") + data_connector_id = data_connector["id"] + _, response = await sanic_client.post( + "/api/data/groups", headers=user_headers, json={"name": "My Group", "slug": "my-group"} + ) + assert response.status_code == 201, response.text + patch = [{"id": member_1_user.id, "role": group_role}] + _, response = await sanic_client.patch("/api/data/groups/my-group/members", headers=user_headers, json=patch) + assert response.status_code == 200 + payload = {"name": "Project A", "namespace": "my-group"} + _, response = await sanic_client.post("/api/data/projects", headers=user_headers, json=payload) + assert response.status_code == 201 + project = response.json + project_id = project["id"] + + # Check that "member_1" can view the project + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=member_1_headers) + assert response.status_code == 200, response.text + # Check that "member_1" cannot view the data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 404, response.text + + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + + # Check that "member_1" can still not view the data connector + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=member_1_headers) + assert response.status_code == 404, response.text + + +@pytest.mark.asyncio +async def test_delete_data_connector_project_link( + sanic_client: SanicASGITestClient, create_data_connector, create_project, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + project = await create_project("Project A") + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + link = response.json + + _, response = await sanic_client.delete( + f"/api/data/data_connectors/{data_connector_id}/project_links/{link["id"]}", headers=user_headers + ) + + assert response.status_code == 204, response.text + + # Check that the links list from the data connector is empty now + _, response = await sanic_client.get( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers + ) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + # Check that the links list to the project is empty now + _, response = await sanic_client.get(f"/api/data/projects/{project_id}/data_connector_links", headers=user_headers) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + # Check that calling delete again returns a 204 + _, response = await sanic_client.delete( + f"/api/data/data_connectors/{data_connector_id}/project_links/{link["id"]}", headers=user_headers + ) + + assert response.status_code == 204, response.text + + +@pytest.mark.asyncio +async def test_delete_data_connector_after_linking( + sanic_client: SanicASGITestClient, create_data_connector, create_project, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + project = await create_project("Project A") + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + + _, response = await sanic_client.delete(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + + assert response.status_code == 204, response.text + + # Check that the project still exists + _, response = await sanic_client.get(f"/api/data/projects/{project_id}", headers=user_headers) + assert response.status_code == 200, response.text + + # Check that the links list to the project is empty now + _, response = await sanic_client.get(f"/api/data/projects/{project_id}/data_connector_links", headers=user_headers) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + +@pytest.mark.asyncio +async def test_delete_project_after_linking( + sanic_client: SanicASGITestClient, create_data_connector, create_project, user_headers +) -> None: + data_connector = await create_data_connector("Data connector 1") + project = await create_project("Project A") + data_connector_id = data_connector["id"] + project_id = project["id"] + payload = {"project_id": project_id} + _, response = await sanic_client.post( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers, json=payload + ) + assert response.status_code == 201, response.text + + _, response = await sanic_client.delete(f"/api/data/projects/{project_id}", headers=user_headers) + + assert response.status_code == 204, response.text + + # Check that the data connector still exists + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}", headers=user_headers) + + assert response.status_code == 200, response.text + + # Check that the links list from the data connector is empty now + _, response = await sanic_client.get( + f"/api/data/data_connectors/{data_connector_id}/project_links", headers=user_headers + ) + + assert response.status_code == 200, response.text + assert response.json is not None + assert len(response.json) == 0 + + +@pytest.mark.asyncio +async def test_patch_data_connector_secrets( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector") + data_connector_id = data_connector["id"] + + payload = [ + {"name": "access_key_id", "value": "access key id value"}, + {"name": "secret_access_key", "value": "secret access key value"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + + # Check that the secrets are returned from a GET request + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + + +@pytest.mark.asyncio +async def test_patch_data_connector_secrets_update_secrets( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector") + data_connector_id = data_connector["id"] + payload = [ + {"name": "access_key_id", "value": "access key id value"}, + {"name": "secret_access_key", "value": "secret access key value"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + secret_ids = {s["secret_id"] for s in secrets} + + payload = [ + {"name": "access_key_id", "value": "new access key id value"}, + {"name": "secret_access_key", "value": "new secret access key value"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + assert {s["secret_id"] for s in secrets} == secret_ids + + # Check that the secrets are returned from a GET request + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + assert {s["secret_id"] for s in secrets} == secret_ids + + +@pytest.mark.asyncio +async def test_patch_data_connector_secrets_add_and_remove_secrets( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector") + data_connector_id = data_connector["id"] + payload = [ + {"name": "access_key_id", "value": "access key id value"}, + {"name": "secret_access_key", "value": "secret access key value"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + access_key_id_secret_id = next(filter(lambda s: s["name"] == "access_key_id", secrets), None) + + payload = [ + {"name": "access_key_id", "value": "new access key id value"}, + {"name": "secret_access_key", "value": None}, + {"name": "password", "value": "password"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "password"} + new_access_key_id_secret_id = next(filter(lambda s: s["name"] == "access_key_id", secrets), None) + assert new_access_key_id_secret_id == access_key_id_secret_id + + # Check that the secrets are returned from a GET request + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "password"} + + # Check the associated secrets + _, response = await sanic_client.get("/api/data/user/secrets", params={"kind": "storage"}, headers=user_headers) + + assert response.status_code == 200 + assert response.json is not None + assert len(response.json) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "password"} + + +@pytest.mark.asyncio +async def test_delete_data_connector_secrets( + sanic_client: SanicASGITestClient, create_data_connector, user_headers +) -> None: + data_connector = await create_data_connector("My data connector") + data_connector_id = data_connector["id"] + payload = [ + {"name": "access_key_id", "value": "access key id value"}, + {"name": "secret_access_key", "value": "secret access key value"}, + ] + _, response = await sanic_client.patch( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers, json=payload + ) + assert response.status_code == 200, response.json + assert response.json is not None + secrets = response.json + assert len(secrets) == 2 + assert {s["name"] for s in secrets} == {"access_key_id", "secret_access_key"} + + _, response = await sanic_client.delete( + f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers + ) + + assert response.status_code == 204, response.json + + # Check that the secrets list is empty from the GET request + _, response = await sanic_client.get(f"/api/data/data_connectors/{data_connector_id}/secrets", headers=user_headers) + assert response.status_code == 200, response.json + assert response.json == [], response.json + + # Check that the associated secrets are deleted + _, response = await sanic_client.get("/api/data/user/secrets", params={"kind": "storage"}, headers=user_headers) + + assert response.status_code == 200 + assert response.json == [], response.json diff --git a/test/bases/renku_data_services/data_api/test_schemathesis.py b/test/bases/renku_data_services/data_api/test_schemathesis.py index b5fa72c6a..728bbba82 100644 --- a/test/bases/renku_data_services/data_api/test_schemathesis.py +++ b/test/bases/renku_data_services/data_api/test_schemathesis.py @@ -39,9 +39,9 @@ async def apispec(sanic_client: SanicASGITestClient) -> BaseOpenAPISchema: # Same issue as for "security" for the "If-Match" header. # We skip header values which cannot be encoded as ascii. @schemathesis.hook -def filter_headers(context: HookContext, headers: dict[str, str]) -> bool: +def filter_headers(context: HookContext, headers: dict[str, str] | None) -> bool: op = context.operation - if op.method.upper() == "PATCH" and (op.path == "/projects/{project_id}" or op.path == "/platform/config"): + if headers is not None and op.method.upper() == "PATCH": if_match = headers.get("If-Match") if if_match and isinstance(if_match, str): try: diff --git a/test/bases/renku_data_services/data_api/test_storage_v2.py b/test/bases/renku_data_services/data_api/test_storage_v2.py deleted file mode 100644 index 2eb9fbfa7..000000000 --- a/test/bases/renku_data_services/data_api/test_storage_v2.py +++ /dev/null @@ -1,437 +0,0 @@ -from typing import Any, Optional - -import pytest - - -@pytest.fixture -def project_owner_member_headers(member_2_headers: dict[str, str]) -> dict[str, str]: - """Authentication headers for a normal project owner user.""" - return member_2_headers - - -@pytest.fixture -def project_non_member_headers(unauthorized_headers: dict[str, str]) -> dict[str, str]: - """Authentication headers for a user that isn't a member of a project.""" - return unauthorized_headers - - -@pytest.fixture -def project_normal_member_headers(member_1_headers: dict[str, str]) -> dict[str, str]: - """Authentication headers for a user that isn't a member of a project.""" - return member_1_headers - - -@pytest.fixture -def create_storage(sanic_client, user_headers, admin_headers, create_project, project_members): - async def create_storage_helper(project_id: Optional[str] = None, admin: bool = False, **payload) -> dict[str, Any]: - if not project_id: - project = await create_project("Project", members=project_members) - project_id = project["id"] - - headers = admin_headers if admin else user_headers - storage_payload = { - "project_id": project_id, - "name": "my-storage", - "configuration": { - "type": "s3", - "provider": "AWS", - "region": "us-east-1", - }, - "source_path": "bucket/my-folder", - "target_path": "my/target", - } - storage_payload.update(payload) - - _, response = await sanic_client.post("/api/data/storages_v2", headers=headers, json=storage_payload) - - assert response.status_code == 201, response.text - return response.json - - return create_storage_helper - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["admin_headers", "user_headers", "project_owner_member_headers"]) -async def test_storage_v2_can_create_as_admin_or_owner( - sanic_client, create_project, project_members, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - # Create some projects - await create_project("Project 1") - project = await create_project("Project 2", members=project_members) - await create_project("Project 3") - - payload = { - "project_id": project["id"], - "name": "my-storage", - "configuration": { - "type": "s3", - "provider": "AWS", - "region": "us-east-1", - }, - "source_path": "bucket/my-folder", - "target_path": "my/target", - } - - _, response = await sanic_client.post("/api/data/storages_v2", headers=headers, json=payload) - - assert response - assert response.status_code == 201 - assert response.json - assert response.json["storage"]["project_id"] == project["id"] - assert response.json["storage"]["storage_type"] == "s3" - assert response.json["storage"]["name"] == payload["name"] - assert response.json["storage"]["target_path"] == payload["target_path"] - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "headers_name", ["unauthorized_headers", "project_normal_member_headers", "project_non_member_headers"] -) -async def test_storage_v2_create_cannot_as_unauthorized_or_non_owner_or_non_member( - sanic_client, create_project, project_members, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - # Create some projects - await create_project("Project 1") - project = await create_project("Project 2", members=project_members) - await create_project("Project 3") - - payload = { - "project_id": project["id"], - "name": "my-storage", - "configuration": { - "type": "s3", - "provider": "AWS", - "region": "us-east-1", - }, - "source_path": "bucket/my-folder", - "target_path": "my/target", - } - - _, response = await sanic_client.post("/api/data/storages_v2", headers=headers, json=payload) - - assert response - assert response.status_code == 403 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "headers_name", ["admin_headers", "user_headers", "project_normal_member_headers", "project_owner_member_headers"] -) -async def test_storage_v2_can_get_as_admin_or_project_members( - sanic_client, create_storage, create_project, project_members, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - await create_project("Project 1") - project_2 = await create_project("Project 2", members=project_members) - project_3 = await create_project("Project 3", members=project_members) - - project_2_id = project_2["id"] - - await create_storage(project_id=project_2_id) - - _, response = await sanic_client.get(f"/api/data/storages_v2?project_id={project_2_id}", headers=headers) - - assert response.status_code == 200 - assert len(response.json) == 1 - storage = response.json[0]["storage"] - assert storage["project_id"] == project_2_id - assert storage["storage_type"] == "s3" - assert storage["configuration"]["provider"] == "AWS" - - _, response = await sanic_client.get(f"/api/data/storages_v2?project_id={project_3['id']}", headers=headers) - - assert response.status_code == 200 - assert len(response.json) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["unauthorized_headers", "project_non_member_headers"]) -async def test_storage_v2_cannot_get_as_unauthorized_or_non_member( - sanic_client, create_storage, create_project, project_members, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - project = await create_project("Project", members=project_members) - project_id = project["id"] - - await create_storage(project_id=project_id) - - _, response = await sanic_client.get(f"/api/data/storages_v2?project_id={project_id}", headers=headers) - - assert response.status_code == 200 - assert len(response.json) == 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["user_headers", "project_owner_member_headers"]) -async def test_storage_v2_can_delete_as_owner(sanic_client, create_storage, headers_name, request) -> None: - headers = request.getfixturevalue(headers_name) - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - _, response = await sanic_client.delete(f"/api/data/storages_v2/{storage_id}", headers=headers) - - assert response.status_code == 204 - - _, response = await sanic_client.get(f"/api/data/storages_v2/{storage_id}", headers=headers) - - assert response.status_code == 404 - - -@pytest.mark.asyncio -async def test_storage_v2_cannot_delete_as_normal_member( - sanic_client, create_storage, project_normal_member_headers -) -> None: - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - _, response = await sanic_client.delete( - f"/api/data/storages_v2/{storage_id}", headers=project_normal_member_headers - ) - - assert response.status_code == 403 - - _, response = await sanic_client.get(f"/api/data/storages_v2/{storage_id}", headers=project_normal_member_headers) - - assert response.status_code == 200 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["unauthorized_headers", "project_non_member_headers"]) -async def test_storage_v2_cannot_delete_as_unauthorized_or_non_member( - sanic_client, create_storage, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - _, response = await sanic_client.delete(f"/api/data/storages_v2/{storage_id}", headers=headers) - - assert response.status_code == 403, response.text - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["user_headers", "project_owner_member_headers"]) -async def test_storage_v2_can_patch_as_owner(sanic_client, create_storage, headers_name, request) -> None: - headers = request.getfixturevalue(headers_name) - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = { - "configuration": {"provider": "Other", "region": None, "endpoint": "https://test.com"}, - "source_path": "bucket/my-other-folder", - } - - _, response = await sanic_client.patch(f"/api/data/storages_v2/{storage_id}", headers=headers, json=payload) - - assert response.status_code == 200 - assert response.json["storage"]["configuration"]["provider"] == "Other" - assert response.json["storage"]["source_path"] == "bucket/my-other-folder" - assert "region" not in response.json["storage"]["configuration"] - - -@pytest.mark.asyncio -async def test_storage_v2_cannot_patch_as_normal_member( - sanic_client, create_storage, project_normal_member_headers -) -> None: - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = { - "configuration": {"provider": "Other", "region": None, "endpoint": "https://test.com"}, - "source_path": "bucket/my-other-folder", - } - - _, response = await sanic_client.patch( - f"/api/data/storages_v2/{storage_id}", headers=project_normal_member_headers, json=payload - ) - - assert response.status_code == 403 - - _, response = await sanic_client.get(f"/api/data/storages_v2/{storage_id}", headers=project_normal_member_headers) - - assert response.status_code == 200 - storage = response.json["storage"] - assert storage["configuration"]["provider"] == "AWS" - assert response.json["storage"]["source_path"] == "bucket/my-folder" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("headers_name", ["unauthorized_headers", "project_non_member_headers"]) -async def test_storage_v2_cannot_patch_as_unauthorized_or_non_member( - sanic_client, create_storage, headers_name, request -) -> None: - headers = request.getfixturevalue(headers_name) - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = { - "configuration": {"provider": "Other", "region": None, "endpoint": "https://test.com"}, - "source_path": "bucket/my-other-folder", - } - - _, response = await sanic_client.patch(f"/api/data/storages_v2/{storage_id}", headers=headers, json=payload) - - assert response.status_code == 403, response.text - - -@pytest.mark.asyncio -async def test_storage_v2_is_deleted_if_project_is_deleted( - sanic_client, create_storage, create_project, user_headers -) -> None: - project = await create_project("Project") - project_id = project["id"] - storage = await create_storage(project_id=project_id) - storage_id = storage["storage"]["storage_id"] - - _, response = await sanic_client.delete(f"/api/data/projects/{project_id}", headers=user_headers) - - assert response.status_code == 204, response.text - - _, response = await sanic_client.get(f"/api/data/storages_v2/{storage_id}", headers=user_headers) - - # NOTE: If storage isn't deleted, the status code will be 401 - assert response.status_code == 404 - - -@pytest.mark.asyncio -async def test_storage_v2_create_secret( - sanic_client, create_storage, project_normal_member_headers, project_owner_member_headers -) -> None: - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = [ - {"name": "access_key_id", "value": "access key id value"}, - {"name": "secret_access_key", "value": "secret access key value"}, - ] - - _, response = await sanic_client.post( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers, json=payload - ) - - assert response.status_code == 201, response.json - assert {s["name"] for s in response.json} == {"access_key_id", "secret_access_key"}, response.json - created_secret_ids = {s["secret_id"] for s in response.json} - assert len(created_secret_ids) == 2, response.json - - # NOTE: Save secrets for the same storage for another user - payload = [ - {"name": "another_user_secret", "value": "another value"}, - ] - - _, response = await sanic_client.post( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_owner_member_headers, json=payload - ) - - assert response.status_code == 201, response.json - assert {s["name"] for s in response.json} == {"another_user_secret"}, response.json - - # NOTE: Get secrets for a storage - _, response = await sanic_client.get( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers - ) - - assert response.status_code == 200 - assert {s["name"] for s in response.json} == {"access_key_id", "secret_access_key"}, response.json - - # NOTE: Test that saved secrets are returned when getting a specific storage - _, response = await sanic_client.get(f"/api/data/storages_v2/{storage_id}", headers=project_normal_member_headers) - - assert response.status_code == 200 - assert "secrets" in response.json, response.json - assert {s["name"] for s in response.json["secrets"]} == {"access_key_id", "secret_access_key"}, response.json - assert {s["secret_id"] for s in response.json["secrets"]} == created_secret_ids, response.json - - # NOTE: Test that saved secrets are returned when getting all storages in a project - assert "project_id" in storage["storage"], storage - project_id = storage["storage"]["project_id"] - _, response = await sanic_client.get( - f"/api/data/storages_v2?project_id={project_id}", headers=project_normal_member_headers - ) - - assert response.status_code == 200 - assert len(response.json) == 1 - assert "secrets" in response.json[0], response.json - assert {s["name"] for s in response.json[0]["secrets"]} == {"access_key_id", "secret_access_key"}, response.json - assert {s["secret_id"] for s in response.json[0]["secrets"]} == created_secret_ids, response.json - - -@pytest.mark.asyncio -async def test_storage_v2_update_secret(sanic_client, create_storage, project_normal_member_headers) -> None: - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = [ - {"name": "access_key_id", "value": "access key id value"}, - {"name": "secret_access_key", "value": "secret access key value"}, - ] - - _, response = await sanic_client.post( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers, json=payload - ) - - assert response.status_code == 201, response.json - created_secret_ids = {s["secret_id"] for s in response.json} - - payload = [ - {"name": "access_key_id", "value": "new access key id value"}, - {"name": "secret_access_key", "value": "new secret access key value"}, - ] - - _, response = await sanic_client.post( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers, json=payload - ) - - assert response.status_code == 201, response.json - assert {s["name"] for s in response.json} == {"access_key_id", "secret_access_key"}, response.json - assert {s["secret_id"] for s in response.json} == created_secret_ids - - _, response = await sanic_client.get( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers - ) - - assert response.status_code == 200 - assert {s["name"] for s in response.json} == {"access_key_id", "secret_access_key"}, response.json - - -@pytest.mark.asyncio -async def test_storage_v2_delete_secret(sanic_client, create_storage, project_normal_member_headers) -> None: - storage = await create_storage() - storage_id = storage["storage"]["storage_id"] - - payload = [ - {"name": "access_key_id", "value": "access key id value"}, - {"name": "secret_access_key", "value": "secret access key value"}, - ] - - _, response = await sanic_client.post( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers, json=payload - ) - - assert response.status_code == 201, response.json - - _, response = await sanic_client.delete( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers - ) - - assert response.status_code == 204, response.json - - _, response = await sanic_client.get( - f"/api/data/storages_v2/{storage_id}/secrets", headers=project_normal_member_headers - ) - - assert response.status_code == 200 - assert {s["name"] for s in response.json} == set(), response.json - - # NOTE: Test that associated secrets are deleted - _, response = await sanic_client.get( - "/api/data/user/secrets", params={"kind": "storage"}, headers=project_normal_member_headers - ) - - assert response.status_code == 200 - assert response.json == [], response.json - - # TODO: Once saved secret sharing is implemented, add a test that makes sure shared secrets aren't deleted unless - # no other storage is using them