Skip to content

Commit d7426e2

Browse files
GitHKAndrei Neagumrnicegyu11
authored
Adds project migration script between deployments (ITISFoundation#2868)
* added project migration script * codestyle and remove dunsued functions * cdeoclimate * improve interface * replaced with subprocess.run * fixed formatting and added colors * fixed path * making more readable * removed ignore * fix after sync * revert change from file * semplified build * fail if exsits * more rafactor Co-authored-by: Andrei Neagu <[email protected]> Co-authored-by: Dustin Kaiser <[email protected]>
1 parent 6d4d293 commit d7426e2

File tree

8 files changed

+507
-0
lines changed

8 files changed

+507
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM python:3.8.12-buster
2+
3+
RUN curl https://rclone.org/install.sh | bash && \
4+
rclone --version
5+
6+
WORKDIR /scripts
7+
8+
COPY packages/postgres-database postgres-database
9+
RUN cd postgres-database && pip install .
10+
11+
COPY scripts/maintenance/migrate_project/requirements.txt /scripts/requirements.txt
12+
RUN pip install -r /scripts/requirements.txt
13+
14+
COPY scripts/maintenance/migrate_project/src/*.py /scripts/
15+
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.DEFAULT_GOAL := build
2+
3+
SHELL := /bin/bash
4+
5+
CFG_NAME := cfg.ignore.json
6+
REPO_DIR := $(CURDIR)/../../..
7+
IMAGE_NAME := migrate-utils
8+
TAG := latest
9+
10+
build:
11+
docker build -t ${IMAGE_NAME} --file $(PWD)/Dockerfile $(REPO_DIR)
12+
13+
shell:
14+
docker run -it --rm ${IMAGE_NAME}:${TAG} bash
15+
16+
empty-config-file: # empty configuration file to fill up
17+
docker run -it --rm \
18+
${IMAGE_NAME}:${TAG} python models.py > ${CFG_NAME}
19+
20+
migrate:
21+
docker run -it --rm \
22+
-v ${PWD}/${CFG_NAME}:/tmp/cfg.json \
23+
${IMAGE_NAME}:${TAG} python cli.py --config /tmp/cfg.json
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# project migration
2+
3+
Built on top of the existing `postgres-database` package.
4+
It is used to migrate a user's project and eventually (hidden projects, generated via the api).
5+
6+
If a file's or project's unique identifier already exist in the destination database, the process will not continue.
7+
8+
**NOTE:** data sync is done using `rclone`, currently it is assumed that the data source is a `MINIO S3 backend` and the destination is an `AWS S3 backend`.
9+
10+
11+
Any doubts? Ask **ANE**.
12+
# How to use
13+
14+
Build the image locally
15+
16+
```
17+
make build
18+
```
19+
20+
Create a configuration file
21+
22+
```
23+
make empty-config-file
24+
```
25+
26+
Fill up the `cfg.json` with data. Also refer to `src/models.py` on how to fill up the file.
27+
28+
Finally start the process
29+
30+
```
31+
make migrate
32+
```
33+
34+
It will copy 1 file at a time, so this operation might take a bit.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pydantic
2+
typer
3+
sqlalchemy
4+
psycopg2
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from pathlib import Path
2+
3+
import typer
4+
from db import (
5+
db_connection,
6+
get_project_and_files_to_migrate,
7+
insert_file_meta_data,
8+
insert_projects,
9+
)
10+
from models import Settings
11+
from r_clone import assemble_config_file, sync_file
12+
13+
14+
def main(config: Path = typer.Option(..., exists=True)):
15+
assert config.exists() # nosec
16+
settings = Settings.load_from_file(config)
17+
typer.echo(f"Detected settings:\n{settings.json(indent=2)}\n")
18+
19+
r_clone_config_path = assemble_config_file(
20+
# destination is AWS S3
21+
aws_access_key=settings.destination.s3.access_key,
22+
aws_secret_key=settings.destination.s3.secret_key,
23+
# source is MINIO
24+
minio_access_key=settings.source.s3.access_key,
25+
minio_secret_key=settings.source.s3.secret_key,
26+
minio_endpoint=settings.source.s3.endpoint,
27+
)
28+
typer.echo(f"Rclone config:\n{r_clone_config_path.read_text()}\n")
29+
30+
with db_connection(settings.source.db) as src_db_conn, db_connection(
31+
settings.destination.db
32+
) as dst_db_conn:
33+
(
34+
projects_to_migrate,
35+
files_meta_data_to_migrate,
36+
) = get_project_and_files_to_migrate(
37+
project_uuid=settings.source.project_uuid,
38+
hidden_projects_for_user=settings.source.hidden_projects_for_user,
39+
src_conn=src_db_conn,
40+
dst_conn=dst_db_conn,
41+
)
42+
43+
# Move data
44+
for file_meta_data in files_meta_data_to_migrate:
45+
# replacing user id with target one
46+
assert "user_id" in file_meta_data # nosec
47+
file_meta_data["user_id"] = settings.destination.user_id
48+
49+
sync_file(
50+
config_path=r_clone_config_path,
51+
s3_object=file_meta_data["object_name"],
52+
source_bucket=settings.source.s3.bucket,
53+
destination_bucket=settings.destination.s3.bucket,
54+
)
55+
insert_file_meta_data(connection=dst_db_conn, data=file_meta_data)
56+
57+
# insert projects
58+
for project in projects_to_migrate:
59+
assert "prj_owner" in project # nosec
60+
project["prj_owner"] = settings.destination.user_id
61+
# strip this field as it is not required
62+
assert "id" in project # nosec
63+
del project["id"]
64+
65+
assert "access_rights" in project # nosec
66+
project["access_rights"] = {
67+
f"{settings.destination.user_gid}": {
68+
"read": True,
69+
"write": True,
70+
"delete": True,
71+
}
72+
}
73+
74+
insert_projects(connection=dst_db_conn, data=project)
75+
76+
77+
if __name__ == "__main__":
78+
typer.run(main)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from collections import deque
2+
from contextlib import contextmanager
3+
from typing import Any, Deque, Dict, Iterator, Optional, Tuple
4+
from uuid import UUID
5+
6+
import typer
7+
from models import DBConfig
8+
from simcore_postgres_database.models.file_meta_data import file_meta_data
9+
from simcore_postgres_database.models.projects import projects
10+
from sqlalchemy import and_, create_engine, select
11+
from sqlalchemy.dialects.postgresql import insert
12+
from sqlalchemy.engine.base import Connection
13+
from sqlalchemy.engine.cursor import ResultProxy
14+
15+
16+
@contextmanager
17+
def db_connection(db_config: DBConfig) -> Iterator[Connection]:
18+
engine = create_engine(
19+
f"postgresql://{db_config.user}:{db_config.password}@{db_config.address}/{db_config.database}",
20+
echo=True,
21+
)
22+
with engine.connect() as con:
23+
yield con
24+
25+
26+
def _project_uuid_exists_in_destination(
27+
connection: Connection, project_id: str
28+
) -> bool:
29+
query = select([projects.c.id]).where(projects.c.uuid == f"{project_id}")
30+
exists = len(list(connection.execute(query))) > 0
31+
return exists
32+
33+
34+
def _meta_data_exists_in_destination(connection: Connection, file_uuid: str) -> bool:
35+
query = select([file_meta_data.c.file_uuid]).where(
36+
file_meta_data.c.file_uuid == f"{file_uuid}"
37+
)
38+
exists = len(list(connection.execute(query))) > 0
39+
return exists
40+
41+
42+
def _get_project(connection: Connection, project_uuid: UUID) -> ResultProxy:
43+
return connection.execute(
44+
select([projects]).where(projects.c.uuid == f"{project_uuid}")
45+
)
46+
47+
48+
def _get_hidden_project(connection: Connection, prj_owner: int) -> ResultProxy:
49+
return connection.execute(
50+
select([projects]).where(
51+
and_(projects.c.prj_owner == prj_owner, projects.c.hidden == True)
52+
)
53+
)
54+
55+
56+
def _get_file_meta_data_without_soft_links(
57+
connection: Connection, user_id: int, project_id: UUID
58+
) -> ResultProxy:
59+
return connection.execute(
60+
select([file_meta_data]).where(
61+
and_(
62+
file_meta_data.c.user_id == f"{user_id}",
63+
file_meta_data.c.project_id == f"{project_id}",
64+
file_meta_data.c.is_soft_link != True,
65+
)
66+
)
67+
)
68+
69+
70+
def _format_message(message: str, color: str, bold: bool = False) -> None:
71+
formatted_message = typer.style(message, fg=color, bold=bold)
72+
typer.echo(formatted_message)
73+
74+
75+
def _red_message(message: str) -> None:
76+
_format_message(message, typer.colors.RED, bold=True)
77+
78+
79+
def _green_message(message: str) -> None:
80+
_format_message(message, typer.colors.GREEN)
81+
82+
83+
def _project_summary(project: Dict) -> str:
84+
return f"PROJECT: {project['uuid']} {project['name']}"
85+
86+
87+
def _file_summary(file_meta_data: Dict) -> str:
88+
return f"FILE: {file_meta_data['file_uuid']}"
89+
90+
91+
def get_project_and_files_to_migrate(
92+
project_uuid: UUID,
93+
hidden_projects_for_user: Optional[int],
94+
src_conn: Connection,
95+
dst_conn: Connection,
96+
) -> Tuple[Deque, Deque]:
97+
skipped_projects = deque()
98+
skipped_files_meta_data = deque()
99+
100+
projects_to_migrate = deque()
101+
files_meta_data_to_migrate = deque()
102+
103+
user_project_selection = list(_get_project(src_conn, project_uuid))
104+
assert len(user_project_selection) == 1
105+
user_project = user_project_selection[0]
106+
107+
project = dict(user_project.items())
108+
project_id = project["uuid"]
109+
110+
if _project_uuid_exists_in_destination(dst_conn, project_id):
111+
error_message = f"main project {project['uuid']} already exists at destination!"
112+
_red_message(error_message)
113+
raise Exception(error_message)
114+
115+
projects_to_migrate.append(project)
116+
117+
if hidden_projects_for_user:
118+
# extract all hidden projects and check if they require syncing
119+
hidden_projects_cursor = _get_hidden_project(src_conn, hidden_projects_for_user)
120+
for hidden_result in hidden_projects_cursor:
121+
hidden_project = dict(hidden_result.items())
122+
if _project_uuid_exists_in_destination(dst_conn, hidden_project["uuid"]):
123+
_red_message(f"SKIPPING, sync for {_project_summary(project)}")
124+
skipped_projects.append(project)
125+
continue
126+
127+
projects_to_migrate.append(hidden_project)
128+
129+
# check file_meta_data in the projects to migrate
130+
for project in projects_to_migrate:
131+
user_id = project["prj_owner"]
132+
project_id = project["uuid"]
133+
134+
files_metadata_cursor = _get_file_meta_data_without_soft_links(
135+
connection=src_conn, user_id=user_id, project_id=project_id
136+
)
137+
for result in files_metadata_cursor:
138+
file_meta_data = dict(result.items())
139+
file_uuid = file_meta_data["file_uuid"]
140+
141+
if _meta_data_exists_in_destination(dst_conn, file_uuid):
142+
_red_message(f"SKIPPING, sync for {_file_summary(file_meta_data)}")
143+
skipped_files_meta_data.append(file_meta_data)
144+
continue
145+
146+
files_meta_data_to_migrate.append(file_meta_data)
147+
148+
if len(skipped_projects) > 0:
149+
_red_message("SKIPPED projects count %s" % len(skipped_projects))
150+
if len(skipped_files_meta_data) > 0:
151+
_red_message("SKIPPED files count %s" % len(skipped_files_meta_data))
152+
153+
_green_message("Projects to move %s" % len(projects_to_migrate))
154+
_green_message("Files to move %s" % len(files_meta_data_to_migrate))
155+
156+
# if files and projects already exist
157+
if len(skipped_files_meta_data) > 0 or len(skipped_projects) > 0:
158+
_red_message(
159+
"Projects skipped uuid(primary keys) listing: %s"
160+
% [x["uuid"] for x in skipped_projects],
161+
)
162+
_red_message(
163+
"File meta data skipped file_uuid(primary keys) listing: %s"
164+
% [x["file_uuid"] for x in skipped_files_meta_data],
165+
)
166+
raise Exception(
167+
"Could not continue migration, some projects or files already exist."
168+
)
169+
170+
return projects_to_migrate, files_meta_data_to_migrate
171+
172+
173+
def insert_file_meta_data(connection: Connection, data: Dict[str, Any]) -> None:
174+
connection.execute(insert(file_meta_data).values(**data))
175+
176+
177+
def insert_projects(connection: Connection, data: Dict[str, Any]) -> None:
178+
connection.execute(insert(projects).values(**data))

0 commit comments

Comments
 (0)