Skip to content

Commit 68a9508

Browse files
authored
✨ Add ENV to prevent copying if project too large (⚠️ devops) (ITISFoundation#3286)
1 parent d357e9b commit 68a9508

File tree

17 files changed

+194
-108
lines changed

17 files changed

+194
-108
lines changed

.env-devel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ TRACING_THRIFT_COMPACT_ENDPOINT=http://jaeger:5775
8787
TRAEFIK_SIMCORE_ZONE=internal_simcore_stack
8888

8989
# NOTE: WEBSERVER_SESSION_SECRET_KEY = $(python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key())")
90+
PROJECTS_MAX_COPY_SIZE_BYTES=30Gib
9091
WEBSERVER_DEV_FEATURES_ENABLED=0
9192
WEBSERVER_HOST=webserver
9293
WEBSERVER_LOGIN_REGISTRATION_CONFIRMATION_REQUIRED=0

.env-wb-garbage-collector

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ WEBSERVER_EXPORTER=null
1515
WEBSERVER_FRONTEND=null
1616
#WEBSERVER_GARBAGE_COLLECTOR explicit in
1717
WEBSERVER_LOGIN=null
18+
WEBSERVER_PROJECTS=null
1819
#WEBSERVER_REDIS from .env
1920
#WEBSERVER_REST needed for the healthcheck
2021
#WEBSERVER_RESOURCE_MANAGER from .env
@@ -28,7 +29,6 @@ WEBSERVER_CLUSTERS=0
2829
WEBSERVER_GROUPS=0
2930
WEBSERVER_META_MODELING=0
3031
WEBSERVER_PRODUCTS=0
31-
WEBSERVER_PROJECTS=0
3232
WEBSERVER_PUBLICATIONS=0
3333
WEBSERVER_SOCKETIO=0
3434
WEBSERVER_STUDIES_DISPATCHER=null

services/web/client/source/class/osparc/data/PollTask.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ qx.Class.define("osparc.data.PollTask", {
108108
.then(res => res.json())
109109
.then(result => {
110110
if ("error" in result && result["error"]) {
111-
throw new Error(result["error"]);
111+
throw new Error(result["error"]["message"]);
112112
}
113113
if ("data" in result && result["data"]) {
114114
const resultData = result["data"];

services/web/server/src/simcore_service_webserver/application_settings.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
from functools import cached_property
3-
from typing import Any, Dict, List, Optional
3+
from typing import Any, Optional
44

55
from aiohttp import web
66
from models_library.basic_types import (
@@ -33,6 +33,7 @@
3333
from .exporter.settings import ExporterSettings
3434
from .garbage_collector_settings import GarbageCollectorSettings
3535
from .login.settings import LoginSettings
36+
from .projects.projects_settings import ProjectsSettings
3637
from .resource_manager.settings import ResourceManagerSettings
3738
from .rest_settings import RestSettings
3839
from .scicrunch.settings import SciCrunchSettings
@@ -169,12 +170,15 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
169170
auto_default_from_env=True, description="tracing plugin"
170171
)
171172

173+
WEBSERVER_PROJECTS: Optional[ProjectsSettings] = Field(
174+
auto_default_from_env=True, description="projects plugin"
175+
)
176+
172177
# These plugins only require (for the moment) an entry to toggle between enabled/disabled
173178
WEBSERVER_CLUSTERS: bool = True
174179
WEBSERVER_GROUPS: bool = True
175180
WEBSERVER_META_MODELING: bool = True
176181
WEBSERVER_PRODUCTS: bool = True
177-
WEBSERVER_PROJECTS: bool = True
178182
WEBSERVER_PUBLICATIONS: bool = True
179183
WEBSERVER_REMOTE_DEBUG: bool = True
180184
WEBSERVER_SOCKETIO: bool = True
@@ -249,7 +253,7 @@ def get_healthcheck_timeout_in_seconds(cls, v):
249253
def is_enabled(self, field_name: str) -> bool:
250254
return bool(getattr(self, field_name, None))
251255

252-
def _get_disabled_public_plugins(self) -> List[str]:
256+
def _get_disabled_public_plugins(self) -> list[str]:
253257
plugins_disabled = []
254258
# NOTE: this list is limited for security reasons. An unbounded list
255259
# might reveal critical info on the settings of a deploy to the client.
@@ -267,17 +271,15 @@ def _get_disabled_public_plugins(self) -> List[str]:
267271
plugins_disabled.append(field_name)
268272
return plugins_disabled
269273

270-
def public_dict(self) -> Dict[str, Any]:
274+
def public_dict(self) -> dict[str, Any]:
271275
"""Data publicaly available"""
272276

273277
data = {"invitation_required": False}
274278
if self.WEBSERVER_LOGIN:
275279
data[
276280
"invitation_required"
277281
] = self.WEBSERVER_LOGIN.LOGIN_REGISTRATION_INVITATION_REQUIRED
278-
data[
279-
"login_2fa_required"
280-
] = self.WEBSERVER_LOGIN.LOGIN_2FA_REQUIRED
282+
data["login_2fa_required"] = self.WEBSERVER_LOGIN.LOGIN_2FA_REQUIRED
281283

282284
data.update(
283285
self.dict(
@@ -294,7 +296,7 @@ def public_dict(self) -> Dict[str, Any]:
294296
)
295297
return data
296298

297-
def to_client_statics(self) -> Dict[str, Any]:
299+
def to_client_statics(self) -> dict[str, Any]:
298300
data = self.dict(
299301
include={
300302
"APP_NAME",

services/web/server/src/simcore_service_webserver/projects/plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def _create_routes(tag, specs, *handlers_module):
5353
logger=logger,
5454
)
5555
def setup_projects(app: web.Application) -> bool:
56-
assert app[APP_SETTINGS_KEY].WEBSERVER_PROJECTS is True # nosec
56+
assert app[APP_SETTINGS_KEY].WEBSERVER_PROJECTS # nosec
5757

5858
# API routes
5959
specs = app[APP_OPENAPI_SPECS_KEY]

services/web/server/src/simcore_service_webserver/projects/projects_handlers_crud.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@
3737
from .. import catalog, director_v2_api
3838
from .._constants import RQ_PRODUCT_KEY
3939
from .._meta import api_version_prefix as VTAG
40+
from ..application_settings import get_settings
4041
from ..login.decorators import RQT_USERID_KEY, login_required
4142
from ..long_running_tasks import start_task_with_context
4243
from ..resource_manager.websocket_manager import PROJECT_ID_KEY, managed_resource
4344
from ..rest_constants import RESPONSE_MODEL_POLICY
4445
from ..security_api import check_permission
4546
from ..security_decorators import permission_required
46-
from ..storage_api import copy_data_folders_from_project
47+
from ..storage_api import copy_data_folders_from_project, get_project_total_size
4748
from ..users_api import get_user_name
4849
from . import projects_api
4950
from .project_models import ProjectDict, ProjectTypeAPI
@@ -169,6 +170,18 @@ async def _init_project_from_request(
169170
include_templates=True,
170171
)
171172

173+
if max_bytes := get_settings(app).WEBSERVER_PROJECTS.PROJECTS_MAX_COPY_SIZE_BYTES:
174+
# get project total data size
175+
project_data_size = await get_project_total_size(
176+
app, user_id, ProjectID(query_params.from_study)
177+
)
178+
if project_data_size >= max_bytes:
179+
raise web.HTTPUnprocessableEntity(
180+
reason=f"Source project data size is {project_data_size.human_readable()}."
181+
f"This is larger than the maximum {max_bytes.human_readable()} allowed for copying."
182+
"TIP: Please reduce the study size or contact application support."
183+
)
184+
172185
# clone template as user project
173186
new_project, nodes_map = clone_project_document(
174187
source_project,
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from pydantic import ByteSize, Field, parse_obj_as
2+
from settings_library.base import BaseCustomSettings
3+
4+
5+
class ProjectsSettings(BaseCustomSettings):
6+
PROJECTS_MAX_COPY_SIZE_BYTES: ByteSize = Field(
7+
parse_obj_as(ByteSize, "30Gib"),
8+
description="defines the maximum authorized project data size"
9+
" when copying a project (disable with 0)",
10+
)

services/web/server/src/simcore_service_webserver/storage_api.py

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,14 @@
44
import asyncio
55
import logging
66
from pprint import pformat
7-
from typing import Any, Dict, Tuple
7+
from typing import Any
88

99
from aiohttp import ClientError, ClientSession, ClientTimeout, web
10+
from models_library.api_schemas_storage import FileLocationArray, FileMetaDataGet
11+
from models_library.generics import Envelope
12+
from models_library.projects import ProjectID
13+
from models_library.users import UserID
14+
from pydantic import ByteSize, parse_obj_as
1015
from pydantic.types import PositiveInt
1116
from servicelib.aiohttp.client_session import get_client_session
1217
from servicelib.aiohttp.rest_responses import unwrap_envelope
@@ -19,7 +24,7 @@
1924
TOTAL_TIMEOUT_TO_COPY_DATA_SECS = 60 * 60
2025

2126

22-
def _get_storage_client(app: web.Application) -> Tuple[ClientSession, URL]:
27+
def _get_storage_client(app: web.Application) -> tuple[ClientSession, URL]:
2328
settings: StorageSettings = get_plugin_settings(app)
2429
# storage service API endpoint
2530
endpoint = URL(settings.base_url)
@@ -28,16 +33,59 @@ def _get_storage_client(app: web.Application) -> Tuple[ClientSession, URL]:
2833
return session, endpoint
2934

3035

36+
async def get_storage_locations(
37+
app: web.Application, user_id: UserID
38+
) -> FileLocationArray:
39+
log.debug("getting %s accessible locations...", f"{user_id=}")
40+
session, api_endpoint = _get_storage_client(app)
41+
locations_url = (api_endpoint / "locations").with_query(user_id=user_id)
42+
async with session.get(f"{locations_url}") as response:
43+
response.raise_for_status()
44+
locations_enveloped = Envelope[FileLocationArray].parse_obj(
45+
await response.json()
46+
)
47+
assert locations_enveloped.data # nosec
48+
log.info("%s can access %s", f"{user_id=}", f"{locations_enveloped.data=}")
49+
return locations_enveloped.data
50+
51+
52+
async def get_project_total_size(
53+
app: web.Application, user_id: UserID, project_uuid: ProjectID
54+
) -> ByteSize:
55+
log.debug("getting %s total size for %s", f"{project_uuid=}", f"{user_id=}")
56+
user_accessible_locations = await get_storage_locations(app, user_id)
57+
session, api_endpoint = _get_storage_client(app)
58+
59+
project_size_bytes = 0
60+
for location in user_accessible_locations:
61+
files_metadata_url = (
62+
api_endpoint / "locations" / f"{location.id}" / "files" / "metadata"
63+
).with_query(user_id=user_id, uuid_filter=f"{project_uuid}")
64+
async with session.get(f"{files_metadata_url}") as response:
65+
response.raise_for_status()
66+
list_of_files_enveloped = Envelope[list[FileMetaDataGet]].parse_obj(
67+
await response.json()
68+
)
69+
assert list_of_files_enveloped.data # nosec
70+
for file_metadata in list_of_files_enveloped.data:
71+
project_size_bytes += file_metadata.file_size
72+
project_size = parse_obj_as(ByteSize, project_size_bytes)
73+
log.info(
74+
"%s total size is %s", f"{project_uuid}", f"{project_size.human_readable()}"
75+
)
76+
return project_size
77+
78+
3179
async def copy_data_folders_from_project(
3280
app: web.Application,
33-
source_project: Dict,
34-
destination_project: Dict,
35-
nodes_map: Dict,
81+
source_project: dict,
82+
destination_project: dict,
83+
nodes_map: dict,
3684
user_id: int,
3785
):
3886
# TODO: optimize if project has actualy data or not before doing the call
3987
client, api_endpoint = _get_storage_client(app)
40-
log.debug("Coying %d nodes", len(nodes_map))
88+
log.debug("Copying %d nodes", len(nodes_map))
4189

4290
# /simcore-s3/folders:
4391
url = (api_endpoint / "simcore-s3/folders").with_query(user_id=user_id)
@@ -113,7 +161,7 @@ async def is_healthy(app: web.Application) -> bool:
113161
return False
114162

115163

116-
async def get_app_status(app: web.Application) -> Dict[str, Any]:
164+
async def get_app_status(app: web.Application) -> dict[str, Any]:
117165
client, api_endpoint = _get_storage_client(app)
118166

119167
data = {}

services/web/server/tests/conftest.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from models_library.projects_networks import PROJECT_NETWORK_PREFIX
1919
from models_library.projects_state import ProjectState
2020
from pytest_simcore.helpers.utils_assert import assert_status
21+
from pytest_simcore.helpers.utils_dict import ConfigDict
2122
from pytest_simcore.helpers.utils_login import LoggedUser, UserInfoDict
2223
from servicelib.aiohttp.long_running_tasks.server import TaskStatus
2324
from servicelib.json_serialization import json_dumps
@@ -129,7 +130,9 @@ async def logged_user(client, user_role: UserRole) -> AsyncIterator[UserInfoDict
129130

130131

131132
@pytest.fixture
132-
def monkeypatch_setenv_from_app_config(monkeypatch: MonkeyPatch) -> Callable:
133+
def monkeypatch_setenv_from_app_config(
134+
monkeypatch: MonkeyPatch,
135+
) -> Callable[[ConfigDict], dict[str, str]]:
133136
# TODO: Change signature to be analogous to
134137
# packages/pytest-simcore/src/pytest_simcore/helpers/utils_envs.py
135138
# That solution is more flexible e.g. for context manager with monkeypatch
@@ -270,9 +273,12 @@ async def _creator(
270273
)
271274

272275
# get result GET /{task_id}/result
273-
print(f"--> getting project creation result...")
276+
print("--> getting project creation result...")
274277
result = await client.get(f"{result_url}")
275278
data, error = await assert_status(result, expected_creation_response)
279+
if error:
280+
assert not data
281+
return {}
276282
assert data
277283
assert not error
278284
print(f"<-- result: {data}")

services/web/server/tests/data/default_app_config-unit.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ meta_modeling:
5656
products:
5757
enabled: true
5858
projects:
59-
enabled: false
59+
enabled: true
60+
projects_max_copy_size_bytes: 5368709120
6061
publications:
6162
enabled: true
6263
remote_debug:

0 commit comments

Comments
 (0)