Skip to content

Commit ad4abd0

Browse files
authored
feat!: add data connectors (#407)
Feature branch to add support for data connectors. See: SwissDataScienceCenter/renku#3751. Changes: 1. #384 2. #386 3. #406 4. #410 5. #413 6. #443 7. #422
1 parent 5b095d7 commit ad4abd0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+5399
-1188
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,12 @@ components/renku_data_services/platform/apispec.py: components/renku_data_servic
4040
poetry run datamodel-codegen --input components/renku_data_services/platform/api.spec.yaml --output components/renku_data_services/platform/apispec.py --base-class renku_data_services.platform.apispec_base.BaseAPISpec $(codegen_params)
4141
components/renku_data_services/message_queue/apispec.py: components/renku_data_services/message_queue/api.spec.yaml
4242
poetry run datamodel-codegen --input components/renku_data_services/message_queue/api.spec.yaml --output components/renku_data_services/message_queue/apispec.py --base-class renku_data_services.message_queue.apispec_base.BaseAPISpec $(codegen_params)
43+
components/renku_data_services/data_connectors/apispec.py: components/renku_data_services/data_connectors/api.spec.yaml
44+
poetry run datamodel-codegen --input components/renku_data_services/data_connectors/api.spec.yaml --output components/renku_data_services/data_connectors/apispec.py --base-class renku_data_services.data_connectors.apispec_base.BaseAPISpec $(codegen_params)
4345

4446
##@ Apispec
4547

46-
schemas: components/renku_data_services/crc/apispec.py components/renku_data_services/storage/apispec.py components/renku_data_services/users/apispec.py components/renku_data_services/project/apispec.py components/renku_data_services/namespace/apispec.py components/renku_data_services/secrets/apispec.py components/renku_data_services/connected_services/apispec.py components/renku_data_services/repositories/apispec.py components/renku_data_services/notebooks/apispec.py components/renku_data_services/platform/apispec.py components/renku_data_services/message_queue/apispec.py ## Generate pydantic classes from apispec yaml files
48+
schemas: components/renku_data_services/crc/apispec.py components/renku_data_services/storage/apispec.py components/renku_data_services/users/apispec.py components/renku_data_services/project/apispec.py components/renku_data_services/namespace/apispec.py components/renku_data_services/secrets/apispec.py components/renku_data_services/connected_services/apispec.py components/renku_data_services/repositories/apispec.py components/renku_data_services/notebooks/apispec.py components/renku_data_services/platform/apispec.py components/renku_data_services/message_queue/apispec.py components/renku_data_services/data_connectors/apispec.py ## Generate pydantic classes from apispec yaml files
4749
@echo "generated classes based on ApiSpec"
4850

4951
##@ Avro schemas

bases/renku_data_services/background_jobs/config.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
from renku_data_services.authz.authz import Authz
1111
from renku_data_services.authz.config import AuthzConfig
12+
from renku_data_services.data_connectors.db import DataConnectorProjectLinkRepository, DataConnectorRepository
13+
from renku_data_services.data_connectors.migration_utils import DataConnectorMigrationTool
1214
from renku_data_services.errors import errors
1315
from renku_data_services.message_queue.config import RedisConfig
1416
from renku_data_services.message_queue.db import EventRepository
@@ -29,6 +31,7 @@ class SyncConfig:
2931
group_repo: GroupRepository
3032
event_repo: EventRepository
3133
project_repo: ProjectRepository
34+
data_connector_migration_tool: DataConnectorMigrationTool
3235
session_maker: Callable[..., AsyncSession]
3336

3437
@classmethod
@@ -67,7 +70,21 @@ def from_env(cls, prefix: str = "") -> "SyncConfig":
6770
group_repo=group_repo,
6871
authz=Authz(authz_config),
6972
)
70-
73+
data_connector_repo = DataConnectorRepository(
74+
session_maker=session_maker,
75+
authz=Authz(authz_config),
76+
)
77+
data_connector_project_link_repo = DataConnectorProjectLinkRepository(
78+
session_maker=session_maker,
79+
authz=Authz(authz_config),
80+
)
81+
data_connector_migration_tool = DataConnectorMigrationTool(
82+
session_maker=session_maker,
83+
data_connector_repo=data_connector_repo,
84+
data_connector_project_link_repo=data_connector_project_link_repo,
85+
project_repo=project_repo,
86+
authz=Authz(authz_config),
87+
)
7188
user_repo = UserRepo(
7289
session_maker=session_maker,
7390
message_queue=message_queue,
@@ -89,4 +106,13 @@ def from_env(cls, prefix: str = "") -> "SyncConfig":
89106
client_secret = os.environ[f"{prefix}KEYCLOAK_CLIENT_SECRET"]
90107
realm = os.environ.get(f"{prefix}KEYCLOAK_REALM", "Renku")
91108
kc_api = KeycloakAPI(keycloak_url=keycloak_url, client_id=client_id, client_secret=client_secret, realm=realm)
92-
return cls(syncer, kc_api, authz_config, group_repo, event_repo, project_repo, session_maker)
109+
return cls(
110+
syncer,
111+
kc_api,
112+
authz_config,
113+
group_repo,
114+
event_repo,
115+
project_repo,
116+
data_connector_migration_tool,
117+
session_maker,
118+
)

bases/renku_data_services/background_jobs/core.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,33 @@ async def migrate_user_namespaces_make_all_public(config: SyncConfig) -> None:
264264
)
265265
await authz.client.WriteRelationships(authz_change)
266266
logger.info(f"Made user namespace {ns_id} public")
267+
268+
269+
async def migrate_storages_v2_to_data_connectors(config: SyncConfig) -> None:
270+
"""Move storages_v2 to data_connectors."""
271+
logger = logging.getLogger("background_jobs").getChild(migrate_storages_v2_to_data_connectors.__name__)
272+
273+
api_user = InternalServiceAdmin(id=ServiceAdminId.migrations)
274+
storages_v2 = await config.data_connector_migration_tool.get_storages_v2(requested_by=api_user)
275+
276+
if not storages_v2:
277+
logger.info("Nothing to do.")
278+
return
279+
280+
logger.info(f"Migrating {len(storages_v2)} cloud storage v2 items to data connectors.")
281+
failed_storages: list[str] = []
282+
for storage in storages_v2:
283+
try:
284+
data_connector = await config.data_connector_migration_tool.migrate_storage_v2(
285+
requested_by=api_user, storage=storage
286+
)
287+
logger.info(f"Migrated {storage.name} to {data_connector.namespace.slug}/{data_connector.slug}.")
288+
logger.info(f"Deleted storage_v2: {storage.storage_id}")
289+
except Exception as err:
290+
logger.error(f"Failed to migrate {storage.name}.")
291+
logger.error(err)
292+
failed_storages.append(str(storage.storage_id))
293+
294+
logger.info(f"Migrated {len(storages_v2)-len(failed_storages)}/{len(storages_v2)} data connectors.")
295+
if failed_storages:
296+
logger.error(f"Migration failed for storages: {failed_storages}.")

bases/renku_data_services/background_jobs/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
bootstrap_user_namespaces,
1212
fix_mismatched_project_namespace_ids,
1313
migrate_groups_make_all_public,
14+
migrate_storages_v2_to_data_connectors,
1415
migrate_user_namespaces_make_all_public,
1516
)
1617
from renku_data_services.migrations.core import run_migrations_for_app
@@ -28,6 +29,7 @@ async def short_period_sync() -> None:
2829
await fix_mismatched_project_namespace_ids(config)
2930
await migrate_groups_make_all_public(config)
3031
await migrate_user_namespaces_make_all_public(config)
32+
await migrate_storages_v2_to_data_connectors(config)
3133

3234

3335
async def long_period_sync() -> None:

bases/renku_data_services/data_api/app.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
ResourcePoolUsersBP,
1616
UserResourcePoolsBP,
1717
)
18+
from renku_data_services.data_connectors.blueprints import DataConnectorsBP
1819
from renku_data_services.message_queue.blueprints import SearchBP
1920
from renku_data_services.namespace.blueprints import GroupsBP
2021
from renku_data_services.platform.blueprints import PlatformConfigBP
2122
from renku_data_services.project.blueprints import ProjectsBP
2223
from renku_data_services.repositories.blueprints import RepositoriesBP
2324
from renku_data_services.session.blueprints import EnvironmentsBP, SessionLaunchersBP
24-
from renku_data_services.storage.blueprints import StorageBP, StorageSchemaBP, StoragesV2BP
25+
from renku_data_services.storage.blueprints import StorageBP, StorageSchemaBP
2526
from renku_data_services.users.blueprints import KCUsersBP, UserPreferencesBP, UserSecretsBP
2627

2728

@@ -75,12 +76,6 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
7576
storage_repo=config.storage_repo,
7677
authenticator=config.gitlab_authenticator,
7778
)
78-
storages_v2 = StoragesV2BP(
79-
name="storages_v2",
80-
url_prefix=url_prefix,
81-
storage_v2_repo=config.storage_v2_repo,
82-
authenticator=config.authenticator,
83-
)
8479
storage_schema = StorageSchemaBP(name="storage_schema", url_prefix=url_prefix)
8580
user_preferences = UserPreferencesBP(
8681
name="user_preferences",
@@ -153,6 +148,14 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
153148
project_repo=config.project_repo,
154149
authz=config.authz,
155150
)
151+
data_connectors = DataConnectorsBP(
152+
name="data_connectors",
153+
url_prefix=url_prefix,
154+
data_connector_repo=config.data_connector_repo,
155+
data_connector_to_project_link_repo=config.data_connector_to_project_link_repo,
156+
data_connector_secret_repo=config.data_connector_secret_repo,
157+
authenticator=config.authenticator,
158+
)
156159
app.blueprint(
157160
[
158161
resource_pools.blueprint(),
@@ -163,7 +166,6 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
163166
user_secrets.blueprint(),
164167
user_resource_pools.blueprint(),
165168
storage.blueprint(),
166-
storages_v2.blueprint(),
167169
storage_schema.blueprint(),
168170
user_preferences.blueprint(),
169171
misc.blueprint(),
@@ -176,6 +178,7 @@ def register_all_handlers(app: Sanic, config: Config) -> Sanic:
176178
repositories.blueprint(),
177179
platform_config.blueprint(),
178180
search.blueprint(),
181+
data_connectors.blueprint(),
179182
]
180183
)
181184

components/renku_data_services/app_config/config.py

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import renku_data_services.base_models as base_models
2626
import renku_data_services.connected_services
2727
import renku_data_services.crc
28+
import renku_data_services.data_connectors
2829
import renku_data_services.platform
2930
import renku_data_services.repositories
3031
import renku_data_services.storage
@@ -43,6 +44,11 @@
4344
ServerOptionsDefaults,
4445
generate_default_resource_pool,
4546
)
47+
from renku_data_services.data_connectors.db import (
48+
DataConnectorProjectLinkRepository,
49+
DataConnectorRepository,
50+
DataConnectorSecretRepository,
51+
)
4652
from renku_data_services.db_config import DBConfig
4753
from renku_data_services.git.gitlab import DummyGitlabAPI, GitlabAPI
4854
from renku_data_services.k8s.clients import DummyCoreClient, DummySchedulingClient, K8sCoreClient, K8sSchedulingClient
@@ -57,7 +63,7 @@
5763
from renku_data_services.repositories.db import GitRepositoriesRepository
5864
from renku_data_services.secrets.db import UserSecretsRepo
5965
from renku_data_services.session.db import SessionRepository
60-
from renku_data_services.storage.db import StorageRepository, StorageV2Repository
66+
from renku_data_services.storage.db import StorageRepository
6167
from renku_data_services.users.config import UserPreferencesConfig
6268
from renku_data_services.users.db import UserPreferencesRepository
6369
from renku_data_services.users.db import UserRepo as KcUserRepo
@@ -162,7 +168,6 @@ class Config:
162168
_user_repo: UserRepository | None = field(default=None, repr=False, init=False)
163169
_rp_repo: ResourcePoolRepository | None = field(default=None, repr=False, init=False)
164170
_storage_repo: StorageRepository | None = field(default=None, repr=False, init=False)
165-
_storage_v2_repo: StorageV2Repository | None = field(default=None, repr=False, init=False)
166171
_project_repo: ProjectRepository | None = field(default=None, repr=False, init=False)
167172
_group_repo: GroupRepository | None = field(default=None, repr=False, init=False)
168173
_event_repo: EventRepository | None = field(default=None, repr=False, init=False)
@@ -175,6 +180,11 @@ class Config:
175180
_connected_services_repo: ConnectedServicesRepository | None = field(default=None, repr=False, init=False)
176181
_git_repositories_repo: GitRepositoriesRepository | None = field(default=None, repr=False, init=False)
177182
_platform_repo: PlatformRepository | None = field(default=None, repr=False, init=False)
183+
_data_connector_repo: DataConnectorRepository | None = field(default=None, repr=False, init=False)
184+
_data_connector_to_project_link_repo: DataConnectorProjectLinkRepository | None = field(
185+
default=None, repr=False, init=False
186+
)
187+
_data_connector_secret_repo: DataConnectorSecretRepository | None = field(default=None, repr=False, init=False)
178188

179189
def __post_init__(self) -> None:
180190
# NOTE: Read spec files required for Swagger
@@ -218,6 +228,10 @@ def __post_init__(self) -> None:
218228
with open(spec_file) as f:
219229
search = safe_load(f)
220230

231+
spec_file = Path(renku_data_services.data_connectors.__file__).resolve().parent / "api.spec.yaml"
232+
with open(spec_file) as f:
233+
data_connectors = safe_load(f)
234+
221235
self.spec = merge_api_specs(
222236
crc_spec,
223237
storage_spec,
@@ -229,6 +243,7 @@ def __post_init__(self) -> None:
229243
repositories,
230244
platform,
231245
search,
246+
data_connectors,
232247
)
233248

234249
if self.default_resource_pool_file is not None:
@@ -273,18 +288,6 @@ def storage_repo(self) -> StorageRepository:
273288
)
274289
return self._storage_repo
275290

276-
@property
277-
def storage_v2_repo(self) -> StorageV2Repository:
278-
"""The DB adapter for V2 cloud storage configs."""
279-
if not self._storage_v2_repo:
280-
self._storage_v2_repo = StorageV2Repository(
281-
session_maker=self.db.async_session_maker,
282-
project_authz=self.authz,
283-
user_repo=self.kc_user_repo,
284-
secret_service_public_key=self.secrets_service_public_key,
285-
)
286-
return self._storage_v2_repo
287-
288291
@property
289292
def event_repo(self) -> EventRepository:
290293
"""The DB adapter for cloud event configs."""
@@ -412,6 +415,36 @@ def platform_repo(self) -> PlatformRepository:
412415
)
413416
return self._platform_repo
414417

418+
@property
419+
def data_connector_repo(self) -> DataConnectorRepository:
420+
"""The DB adapter for data connectors."""
421+
if not self._data_connector_repo:
422+
self._data_connector_repo = DataConnectorRepository(
423+
session_maker=self.db.async_session_maker, authz=self.authz
424+
)
425+
return self._data_connector_repo
426+
427+
@property
428+
def data_connector_to_project_link_repo(self) -> DataConnectorProjectLinkRepository:
429+
"""The DB adapter for data connector to project links."""
430+
if not self._data_connector_to_project_link_repo:
431+
self._data_connector_to_project_link_repo = DataConnectorProjectLinkRepository(
432+
session_maker=self.db.async_session_maker, authz=self.authz
433+
)
434+
return self._data_connector_to_project_link_repo
435+
436+
@property
437+
def data_connector_secret_repo(self) -> DataConnectorSecretRepository:
438+
"""The DB adapter for data connector secrets."""
439+
if not self._data_connector_secret_repo:
440+
self._data_connector_secret_repo = DataConnectorSecretRepository(
441+
session_maker=self.db.async_session_maker,
442+
data_connector_repo=self.data_connector_repo,
443+
user_repo=self.kc_user_repo,
444+
secret_service_public_key=self.secrets_service_public_key,
445+
)
446+
return self._data_connector_secret_repo
447+
415448
@classmethod
416449
def from_env(cls, prefix: str = "") -> "Config":
417450
"""Create a config from environment variables."""

0 commit comments

Comments
 (0)