Skip to content

Commit 1777706

Browse files
authored
Storage extension for deep copy of project files (#952)
* storage api extension for deleting/copying folders * Implements deep copy * Extended definition of folders path added first handler and test * Added api for storage and tests * Added nodes map * Added project clone * Implements callbacks to rest api
1 parent 24ae516 commit 1777706

File tree

23 files changed

+691
-39
lines changed

23 files changed

+691
-39
lines changed

api/specs/storage/v0/openapi.yaml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,68 @@ paths:
273273
default:
274274
$ref: '#/components/responses/DefaultErrorResponse'
275275

276+
/simcore-s3/folders:
277+
post:
278+
tags:
279+
- users
280+
summary: Deep copies of all data from source to destination project in s3
281+
operationId: copy_folders_from_project
282+
parameters:
283+
- name: user_id
284+
in: query
285+
required: true
286+
schema:
287+
type: integer
288+
requestBody:
289+
content:
290+
application/json:
291+
schema:
292+
type: object
293+
properties:
294+
source:
295+
$ref: './openapi.yaml#/components/schemas/Project'
296+
destination:
297+
$ref: './openapi.yaml#/components/schemas/Project'
298+
nodes_map:
299+
type: object
300+
description: maps source and destination node uuids
301+
additionalProperties:
302+
type: string
303+
responses:
304+
'201':
305+
description: Data from destination project copied and returns project
306+
content:
307+
application/json:
308+
schema:
309+
$ref: './openapi.yaml#/components/schemas/Project'
310+
default:
311+
$ref: './openapi.yaml#/components/responses/DefaultErrorResponse'
312+
313+
/simcore-s3/folders/{folder_id}:
314+
delete:
315+
tags:
316+
- users
317+
summary: Deletes all objects with folder_id prefix
318+
operationId: delete_folders_of_project
319+
parameters:
320+
- name: folder_id
321+
in: path
322+
required: true
323+
schema:
324+
type: string
325+
- name: user_id
326+
in: query
327+
required: true
328+
schema:
329+
type: string
330+
responses:
331+
'204':
332+
description: folder has been successfully deleted
333+
276334
components:
335+
schemas:
336+
Project:
337+
$ref: '../../webserver/v0/openapi-projects.yaml#/components/schemas/Project'
277338
responses:
278339
OK_NoContent_204:
279340
description: everything is OK, but there is no content to return

api/tests/conftest.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
OPENAPI_MAIN_FILENAME = 'openapi.yaml'
1919

2020

21-
def current_dir():
22-
return Path(sys.argv[0] if __name__ == "__main__" else __file__).resolve().parent
21+
current_dir = Path(sys.argv[0] if __name__ == "__main__" else __file__).resolve().parent
2322

2423

2524
@pytest.fixture(scope='session')
2625
def here():
27-
return current_dir()
26+
return current_dir
2827

2928

3029
@pytest.fixture(scope='session')
@@ -91,7 +90,7 @@ def list_openapi_tails():
9190
SEE api_specs_tail to get one at a time
9291
"""
9392
tails = []
94-
specs_dir = _api_specs_dir_impl(current_dir())
93+
specs_dir = _api_specs_dir_impl(current_dir)
9594
for tail in _all_api_specs_tails_impl(specs_dir):
9695
specs = load_specs( specs_dir / tail)
9796
if not is_json_schema(specs):

services/storage/src/simcore_service_storage/application.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66

77
from aiohttp import web
88

9-
from .s3 import setup_s3
109
from .db import setup_db
10+
from .dsm import setup_dsm
1111
from .rest import setup_rest
12+
from .s3 import setup_s3
1213
from .settings import APP_CONFIG_KEY
13-
from .dsm import setup_dsm
1414

1515
log = logging.getLogger(__name__)
1616

services/storage/src/simcore_service_storage/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
"""
1515
import argparse
1616
import logging
17-
import sys
1817
import os
18+
import sys
1919

20-
from . import application, cli_config
2120
from servicelib.utils import search_osparc_repo_dir
2221

22+
from . import application, cli_config
2323

2424
log = logging.getLogger(__name__)
2525

services/storage/src/simcore_service_storage/cli_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import trafaret_config
77
import trafaret_config.commandline as commandline
88

9-
from .resources import RSC_CONFIG_DIR_KEY, resources
109
from .config_schema import schema
10+
from .resources import RSC_CONFIG_DIR_KEY, resources
1111
from .settings import DEFAULT_CONFIG
1212

1313
log = logging.getLogger(__name__)

services/storage/src/simcore_service_storage/dsm.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,3 +426,127 @@ async def download_link(self, user_id: str, location: str, file_uuid: str)->str:
426426
destination, filename = _parse_datcore(file_uuid)
427427
link = await dcw.download_link(destination, filename)
428428
return link
429+
430+
async def deep_copy_project_simcore_s3(self, user_id: str, source_project, destination_project, node_mapping):
431+
""" Parses a given source project and copies all related files to the destination project
432+
433+
Since all files are organized as
434+
435+
project_id/node_id/filename or links to datcore
436+
437+
this function creates a new folder structure
438+
439+
project_id/node_id/filename
440+
441+
and copies all files to the corresponding places.
442+
443+
Additionally, all external files from datcore are being copied and the paths in the destination
444+
project are adapted accordingly
445+
446+
Lastly, the meta data db is kept in sync
447+
"""
448+
source_folder = source_project["uuid"]
449+
dest_folder = destination_project["uuid"]
450+
451+
# build up naming map based on labels
452+
uuid_name_dict = {}
453+
uuid_name_dict[dest_folder] = destination_project["name"]
454+
for src_node_id, src_node in source_project['workbench'].items():
455+
new_node_id = node_mapping.get(src_node_id)
456+
if new_node_id is not None:
457+
uuid_name_dict[new_node_id] = src_node['label']
458+
459+
# Step 1: List all objects for this project replace them with the destination object name and do a copy at the same time collect some names
460+
_loop = asyncio.get_event_loop()
461+
session = aiobotocore.get_session(loop=_loop)
462+
async with session.create_client('s3', endpoint_url=self.s3_client.endpoint_url, aws_access_key_id=self.s3_client.access_key,
463+
aws_secret_access_key=self.s3_client.secret_key) as client:
464+
response = await client.list_objects_v2(Bucket=self.simcore_bucket_name, Prefix=source_folder)
465+
466+
if "Contents" in response:
467+
for f in response['Contents']:
468+
source_object_name = f['Key']
469+
source_object_parts = Path(source_object_name).parts
470+
471+
if len(source_object_parts) == 3:
472+
old_node_id = source_object_parts[1]
473+
new_node_id = node_mapping.get(old_node_id)
474+
if new_node_id is not None:
475+
old_filename = source_object_parts[2]
476+
dest_object_name = str(Path(dest_folder) / new_node_id / old_filename)
477+
copy_source = {'Bucket' : self.simcore_bucket_name, 'Key': source_object_name}
478+
response = await client.copy_object(CopySource=copy_source, Bucket=self.simcore_bucket_name, Key=dest_object_name)
479+
else:
480+
# This may happen once we have shared/home folders
481+
logger.info("len(object.parts != 3")
482+
483+
484+
# Step 2: List all references in outputs that point to datcore and copy over
485+
for node_id, node in destination_project['workbench'].items():
486+
outputs = node.get("outputs")
487+
if outputs is not None:
488+
for _output_key, output in outputs.items():
489+
if "store" in output and output["store"]==DATCORE_ID:
490+
src = output["path"]
491+
dest = str(Path(dest_folder) / Path(node_id) / Path(src).name)
492+
logger.info("Need to copy %s to %s", src, dest)
493+
await self.copy_file(user_id, SIMCORE_S3_STR, dest, DATCORE_STR, src)
494+
# and change the dest project accordingly
495+
output["store"] = 0
496+
output['path'] = dest
497+
498+
# step 3: list files first to create fmds
499+
session = aiobotocore.get_session(loop=_loop)
500+
fmds = []
501+
async with session.create_client('s3', endpoint_url=self.s3_client.endpoint_url, aws_access_key_id=self.s3_client.access_key,
502+
aws_secret_access_key=self.s3_client.secret_key) as client:
503+
response = await client.list_objects_v2(Bucket=self.simcore_bucket_name, Prefix=dest_folder+"/")
504+
if 'Contents' in response:
505+
for f in response['Contents']:
506+
fmd = FileMetaData()
507+
fmd.simcore_from_uuid(f["Key"], self.simcore_bucket_name)
508+
fmd.project_name = uuid_name_dict.get(dest_folder, "Untitled")
509+
fmd.node_name = uuid_name_dict.get(fmd.node_id, "Untitled")
510+
fmd.raw_file_path = fmd.file_uuid
511+
fmd.display_file_path = str(Path(fmd.project_name) / fmd.node_name / fmd.file_name)
512+
fmd.user_id = user_id
513+
fmd.file_size = f['Size']
514+
fmd.last_modified = str(f['LastModified'])
515+
fmds.append(fmd)
516+
517+
518+
# step 4 sync db
519+
async with self.engine.acquire() as conn:
520+
for fmd in fmds:
521+
query = sa.select([file_meta_data]).where(file_meta_data.c.file_uuid == fmd.file_uuid)
522+
# if file already exists, we might w
523+
rows = await conn.execute(query)
524+
exists = await rows.scalar()
525+
if exists:
526+
delete_me = file_meta_data.delete().where(file_meta_data.c.file_uuid == fmd.file_uuid)
527+
await conn.execute(delete_me)
528+
ins = file_meta_data.insert().values(**vars(fmd))
529+
await conn.execute(ins)
530+
531+
async def delete_project_simcore_s3(self, user_id: str, project_id):
532+
""" Deletes all files from a given project in simcore.s3 and updated db accordingly
533+
"""
534+
535+
async with self.engine.acquire() as conn:
536+
delete_me = file_meta_data.delete().where(and_(file_meta_data.c.user_id == user_id,
537+
file_meta_data.c.project_id == project_id))
538+
await conn.execute(delete_me)
539+
540+
_loop = asyncio.get_event_loop()
541+
session = aiobotocore.get_session(loop=_loop)
542+
async with session.create_client('s3', endpoint_url=self.s3_client.endpoint_url, aws_access_key_id=self.s3_client.access_key,
543+
aws_secret_access_key=self.s3_client.secret_key) as client:
544+
response = await client.list_objects_v2(Bucket=self.simcore_bucket_name, Prefix=project_id+"/")
545+
if "Contents" in response:
546+
objects_to_delete = []
547+
for f in response['Contents']:
548+
objects_to_delete.append( { 'Key': f['Key'] })
549+
550+
if objects_to_delete:
551+
response = await client.delete_objects(Bucket=self.simcore_bucket_name, Delete={'Objects' : objects_to_delete})
552+
return response

services/storage/src/simcore_service_storage/handlers.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23

34
import attr
@@ -9,7 +10,7 @@
910
from .db_tokens import get_api_token_and_secret
1011
from .dsm import DataStorageManager, DatCoreApiToken
1112
from .rest_models import FileMetaDataSchema
12-
from .settings import APP_DSM_KEY, DATCORE_STR
13+
from .settings import APP_DSM_KEY, DATCORE_STR, SIMCORE_S3_ID
1314

1415
log = logging.getLogger(__name__)
1516

@@ -160,7 +161,6 @@ async def update_file_meta_data(request: web.Request):
160161
_location = dsm.location_from_id(location_id)
161162

162163

163-
164164
async def download_file(request: web.Request):
165165
params, query, body = await extract_and_validate(request)
166166

@@ -246,6 +246,46 @@ async def delete_file(request: web.Request):
246246
}
247247

248248

249+
async def create_folders_from_project(request: web.Request):
250+
#FIXME: Update openapi-core. Fails with additionalProperties https://github.com/p1c2u/openapi-core/issues/124. Fails with project
251+
# params, query, body = await extract_and_validate(request)
252+
253+
user_id = request.query.get("user_id")
254+
255+
body = await request.json()
256+
source_project = body.get('source', {})
257+
destination_project = body.get('destination', {})
258+
nodes_map = body.get('nodes_map', {})
259+
260+
# TODO: remove this for production
261+
assert set(nodes_map.keys()) == set(source_project['workbench'].keys())
262+
assert set(nodes_map.values()) == set(destination_project['workbench'].keys())
263+
264+
# TODO: validate project with jsonschema instead??
265+
266+
params = { "location_id" : SIMCORE_S3_ID }
267+
query = { "user_id": user_id}
268+
dsm = await _prepare_storage_manager(params, query, request)
269+
await dsm.deep_copy_project_simcore_s3(user_id, source_project, destination_project, nodes_map)
270+
271+
raise web.HTTPCreated(text=json.dumps(destination_project),
272+
content_type='application/json')
273+
274+
async def delete_folders_of_project(request: web.Request):
275+
folder_id = request.match_info['folder_id']
276+
user_id = request.query.get("user_id")
277+
278+
params = { "location_id" : SIMCORE_S3_ID }
279+
query = { "user_id": user_id}
280+
dsm = await _prepare_storage_manager(params, query, request)
281+
await dsm.delete_project_simcore_s3(user_id, folder_id)
282+
283+
raise web.HTTPNoContent(content_type='application/json')
284+
285+
286+
287+
288+
249289
# HELPERS -----------------------------------------------------
250290
INIT_STR = "init"
251291

services/storage/src/simcore_service_storage/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
import attr
1010

1111
from simcore_postgres_database.storage_models import (file_meta_data, metadata,
12-
projects, tokens, user_to_projects, users)
12+
projects, tokens,
13+
user_to_projects, users)
1314
from simcore_service_storage.settings import (DATCORE_STR, SIMCORE_S3_ID,
1415
SIMCORE_S3_STR)
1516

services/storage/src/simcore_service_storage/resources.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
33
"""
44
from servicelib.resources import ResourcesFacade
5-
from .settings import RSC_CONFIG_DIR_KEY # pylint: disable=unused-import
5+
6+
from .settings import RSC_CONFIG_DIR_KEY # pylint: disable=unused-import
67

78
resources = ResourcesFacade(
89
package_name=__name__,

services/storage/src/simcore_service_storage/rest_routes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,12 @@ def create(specs: OpenApiSpec) -> List[web.RouteDef]:
6464
operation_id = specs.paths[path].operations['put'].operation_id
6565
routes.append( web.put(BASEPATH+path, handle, name=operation_id) )
6666

67+
path, handle = '/simcore-s3/folders', handlers.create_folders_from_project
68+
operation_id = specs.paths[path].operations['post'].operation_id
69+
routes.append( web.post(BASEPATH+path, handle, name=operation_id) )
70+
71+
path, handle = '/simcore-s3/folders/{folder_id}', handlers.delete_folders_of_project
72+
operation_id = specs.paths[path].operations['delete'].operation_id
73+
routes.append( web.delete(BASEPATH+path, handle, name=operation_id) )
6774

6875
return routes

0 commit comments

Comments
 (0)