Skip to content

Commit 215ca26

Browse files
authored
🩹 Adds query option to avoid timeout for storage metadata sync (ITISFoundation#2428)
🩹 fix sync between storage's metadata table and s3 objects: - PROBLEM: current sync makes webserver handler time out when s3 holds a lot of data. - SOLUTION: improves check speed in S3 and adds option to fire&forget in query ✨ adds new user role ADMIN. Resync operation can only be done by ADMIN users
1 parent 58ee4cd commit 215ca26

File tree

15 files changed

+185
-54
lines changed

15 files changed

+185
-54
lines changed

api/specs/storage/openapi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,12 @@ paths:
113113
schema:
114114
type: boolean
115115
default: true
116+
- name: fire_and_forget
117+
in: query
118+
required: false
119+
schema:
120+
type: boolean
121+
default: false
116122
responses:
117123
"200":
118124
description: An object containing added, changed and removed paths
@@ -671,6 +677,10 @@ components:
671677
required:
672678
- removed
673679
properties:
680+
dry_run:
681+
type: boolean
682+
fire_and_forget:
683+
type: boolean
674684
removed:
675685
type: array
676686
items:

api/specs/webserver/components/schemas/locations.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ TableSynchronisation:
4242
required:
4343
- removed
4444
properties:
45+
dry_run:
46+
type: boolean
47+
fire_and_forget:
48+
type: boolean
4549
removed:
4650
type: array
4751
items:

api/specs/webserver/openapi-storage.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ paths:
3333
schema:
3434
type: boolean
3535
default: true
36+
- name: fire_and_forget
37+
in: query
38+
required: false
39+
schema:
40+
type: boolean
41+
default: false
3642
responses:
3743
"200":
3844
description: An object containing added, changed and removed paths
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""Adds ADMIN to userrole enum
2+
3+
Revision ID: c2d3acc313e1
4+
Revises: 87a7b69f6723
5+
Create Date: 2021-07-12 13:05:05.782876+00:00
6+
7+
"""
8+
from alembic import op
9+
10+
# revision identifiers, used by Alembic.
11+
revision = "c2d3acc313e1"
12+
down_revision = "87a7b69f6723"
13+
branch_labels = None
14+
depends_on = None
15+
16+
17+
# SEE https://medium.com/makimo-tech-blog/upgrading-postgresqls-enum-type-with-sqlalchemy-using-alembic-migration-881af1e30abe
18+
19+
20+
def upgrade():
21+
with op.get_context().autocommit_block():
22+
op.execute("ALTER TYPE userrole ADD VALUE 'ADMIN'")
23+
24+
25+
def downgrade():
26+
op.execute("ALTER TYPE userrole RENAME TO userrole_old")
27+
op.execute("CREATE TYPE userrole AS ENUM('ANONYMOUS', 'GUEST', 'USER', 'TESTER')")
28+
op.execute(
29+
(
30+
"ALTER TABLE users ALTER COLUMN role TYPE userrole USING "
31+
"role::text::userrole"
32+
)
33+
)
34+
op.execute("DROP TYPE userrole_old")

packages/postgres-database/src/simcore_postgres_database/models/users.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515

1616
class UserRole(Enum):
17-
""" SORTED enumeration of user roles
17+
"""SORTED enumeration of user roles
1818
1919
A role defines a set of privileges the user can perform
2020
Roles are sorted from lower to highest privileges
@@ -25,6 +25,7 @@ class UserRole(Enum):
2525
USER : Registered user. Basic permissions to use the platform [default]
2626
TESTER : Upgraded user. First level of super-user with privileges to test the framework.
2727
Can use everything but does not have an effect in other users or actual data
28+
ADMIN : Framework admin.
2829
2930
See security_access.py
3031
"""
@@ -33,6 +34,7 @@ class UserRole(Enum):
3334
GUEST = "GUEST"
3435
USER = "USER"
3536
TESTER = "TESTER"
37+
ADMIN = "ADMIN"
3638

3739
@classmethod
3840
def super_users(cls):
@@ -43,9 +45,9 @@ def super_users(cls):
4345

4446
class UserStatus(Enum):
4547
"""
46-
pending: user registered but not confirmed
47-
active: user is confirmed and can use the platform
48-
banned: user is not authorized
48+
pending: user registered but not confirmed
49+
active: user is confirmed and can use the platform
50+
banned: user is not authorized
4951
"""
5052

5153
CONFIRMATION_PENDING = "PENDING"
@@ -99,7 +101,7 @@ class UserStatus(Enum):
99101
CREATE TRIGGER user_modification
100102
AFTER INSERT OR UPDATE OR DELETE ON users
101103
FOR EACH ROW
102-
EXECUTE PROCEDURE set_user_groups();
104+
EXECUTE PROCEDURE set_user_groups();
103105
"""
104106
)
105107

@@ -130,5 +132,7 @@ class UserStatus(Enum):
130132

131133
sa.event.listen(users, "after_create", set_user_groups_procedure)
132134
sa.event.listen(
133-
users, "after_create", new_user_trigger,
135+
users,
136+
"after_create",
137+
new_user_trigger,
134138
)

services/storage/requirements/_base.in

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,27 @@
1-
# WARNING: manually disabled urllib3 contraint to upgrade minio
1+
#
2+
# Specifies dependencies required by 'storage' service
3+
#
4+
25
--constraint ../../../requirements/constraints.txt
36

47
--requirement ../../../packages/models-library/requirements/_base.in
58
--requirement ../../../packages/postgres-database/requirements/_base.in
69
--requirement ../../../packages/service-library/requirements/_base.in
710
--requirement ../../../packages/settings-library/requirements/_base.in
811

9-
10-
aiobotocore
11-
12+
# server
1213
aiohttp
1314
aiohttp-swagger[performance]
1415

16+
# s3 storage
17+
aiobotocore
18+
minio
19+
20+
# i/o + db
1521
aiofiles
1622
aiopg[sa]
1723

18-
tenacity
24+
# misc
1925
semantic_version
26+
tenacity
2027
typer
21-
minio
22-
urllib3
23-
24-
#
25-
# FIXME: pip copiling these requirements wil fail due to incompatible contraints on urllib3 (ongoing in weekely mainteinance)
26-
#
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
#
2-
# Specifies dependencies required to run 'storage'
2+
# Specifies dependencies required to TEST 'storage'
33
#
4+
45
--constraint ../../../requirements/constraints.txt
5-
# Adds base AS CONSTRAINT specs, not requirement.
6-
# - Resulting _text.txt is a frozen list of EXTRA packages for testing, besides _base.txt
7-
#
86
--constraint _base.txt
97

108
# testing
@@ -16,17 +14,17 @@ pytest-instafail
1614
pytest-mock
1715
pytest-runner
1816
pytest-sugar
19-
pylint
20-
python-dotenv
2117

2218
# test coverage
2319
coverage
2420
coveralls
2521
codecov
22+
23+
# fixtures
2624
faker
25+
pandas
26+
pylint
27+
python-dotenv
2728

28-
# remote debugging
29+
# remote debugger
2930
ptvsd
30-
31-
# utils
32-
pandas

services/storage/src/simcore_service_storage/api/v0/openapi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ paths:
111111
schema:
112112
type: boolean
113113
default: true
114+
- name: fire_and_forget
115+
in: query
116+
required: false
117+
schema:
118+
type: boolean
119+
default: false
114120
responses:
115121
'200':
116122
description: 'An object containing added, changed and removed paths'
@@ -638,6 +644,10 @@ components:
638644
required:
639645
- removed
640646
properties:
647+
dry_run:
648+
type: boolean
649+
fire_and_forget:
650+
type: boolean
641651
removed:
642652
type: array
643653
items:

services/storage/src/simcore_service_storage/dsm.py

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import aiobotocore
1717
import attr
1818
import sqlalchemy as sa
19+
from aiobotocore.client import AioBaseClient
1920
from aiobotocore.session import AioSession, ClientCreatorContext
2021
from aiohttp import web
2122
from aiopg.sa import Engine
@@ -154,6 +155,8 @@ class DataStorageManager:
154155
def _create_client_context(self) -> ClientCreatorContext:
155156
assert hasattr(self.session, "create_client")
156157
# pylint: disable=no-member
158+
159+
# SEE API in https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html
157160
return self.session.create_client(
158161
"s3",
159162
endpoint_url=self.s3_client.endpoint_url,
@@ -898,7 +901,7 @@ async def delete_file(self, user_id: str, location: str, file_uuid: str):
898901

899902
async def delete_project_simcore_s3(
900903
self, user_id: str, project_id: str, node_id: Optional[str] = None
901-
) -> web.Response:
904+
) -> Optional[web.Response]:
902905

903906
"""Deletes all files from a given node in a project in simcore.s3 and updated db accordingly.
904907
If node_id is not given, then all the project files db entries are deleted.
@@ -1013,38 +1016,55 @@ async def create_soft_link(
10131016
async def synchronise_meta_data_table(
10141017
self, location: str, dry_run: bool
10151018
) -> Dict[str, Any]:
1016-
sync_results = {"removed": []}
1019+
1020+
to_remove = []
1021+
1022+
assert ( # nosec
1023+
location == SIMCORE_S3_STR
1024+
), "Only with s3, no other sync implemented" # nosec
1025+
10171026
if location == SIMCORE_S3_STR:
1018-
# NOTE: only valid for Simcore, since datcore data is not in the database table
1027+
1028+
# NOTE: only valid for simcore, since datcore data is not in the database table
10191029
# let's get all the files in the table
10201030
logger.warning(
10211031
"synchronisation of database/s3 storage started, this will take some time..."
10221032
)
1033+
10231034
async with self.engine.acquire() as conn, self._create_client_context() as s3_client:
1024-
number_of_rows_in_db = await conn.scalar(file_meta_data.count())
1035+
1036+
number_of_rows_in_db = await conn.scalar(file_meta_data.count()) or 0
10251037
logger.warning(
1026-
"total number of entries to check %d",
1038+
"Total number of entries to check %d",
10271039
number_of_rows_in_db,
10281040
)
10291041

1030-
async for row in conn.execute(file_meta_data.select()):
1042+
assert isinstance(s3_client, AioBaseClient) # nosec
1043+
1044+
async for row in conn.execute(
1045+
sa.select([file_meta_data.c.object_name])
1046+
):
10311047
s3_key = row.object_name # type: ignore
10321048

10331049
# now check if the file exists in S3
1034-
try:
1035-
await s3_client.get_object(
1036-
Bucket=self.simcore_bucket_name,
1037-
Key=s3_key,
1038-
)
1039-
except s3_client.exceptions.NoSuchKey:
1040-
# this file does not exist
1041-
sync_results["removed"].append(s3_key)
1050+
# SEE https://www.peterbe.com/plog/fastest-way-to-find-out-if-a-file-exists-in-s3
1051+
response = await s3_client.list_objects_v2(
1052+
Bucket=self.simcore_bucket_name, Prefix=s3_key
1053+
)
1054+
if response.get("KeyCount", 0) == 0: # this file does not exist
1055+
to_remove.append(s3_key)
10421056

1057+
logger.info(
1058+
"%s %d entries ",
1059+
"Would delete" if dry_run else "Deleting",
1060+
len(to_remove),
1061+
)
10431062
if not dry_run:
10441063
await conn.execute(
10451064
file_meta_data.delete().where(
1046-
file_meta_data.c.object_name.in_(sync_results["removed"])
1065+
file_meta_data.c.object_name.in_(to_remove)
10471066
)
10481067
)
1068+
logger.info("Deleted %s orphan items", len(to_remove))
10491069

1050-
return sync_results
1070+
return {"removed": to_remove}

0 commit comments

Comments
 (0)