44import requests
55from dotenv import dotenv_values
66
7+ from scripts .migration .migrate_metadata_definitions import (
8+ check_metadata_definition_exists ,
9+ get_clowder_v1_metadata_definitions ,
10+ post_metadata_definition ,
11+ )
12+
713# Configuration and Constants
814DEFAULT_PASSWORD = "Password123&"
915
@@ -63,33 +69,33 @@ def generate_user_api_key(user, password=DEFAULT_PASSWORD):
6369
6470def get_clowder_v1_users ():
6571 """Retrieve all users from Clowder v1."""
66- endpoint = f"{ CLOWDER_V1 } /api/users"
72+ endpoint = f"{ CLOWDER_V1 } /api/users?superAdmin=true "
6773 response = requests .get (endpoint , headers = base_headers_v1 , verify = False )
6874 return response .json ()
6975
7076
7177def get_clowder_v1_user_datasets (user_id ):
7278 """Retrieve datasets created by a specific user in Clowder v1."""
7379 # TODO what about pagination
74- endpoint = f"{ CLOWDER_V1 } /api/datasets?limit=0"
80+ endpoint = f"{ CLOWDER_V1 } /api/datasets?limit=0&superAdmin=true "
7581 response = requests .get (endpoint , headers = clowder_headers_v1 , verify = False )
7682 return [dataset for dataset in response .json () if dataset ["authorId" ] == user_id ]
7783
7884
7985def get_clowder_v1_user_spaces (user_v1 ):
80- endpoint = f"{ CLOWDER_V1 } /api/spaces"
86+ endpoint = f"{ CLOWDER_V1 } /api/spaces?superAdmin=true "
8187 response = requests .get (endpoint , headers = clowder_headers_v1 , verify = False )
8288 return [space for space in response .json () if space ["creator" ] == user_v1 ["id" ]]
8389
8490
8591def get_clowder_v1_user_spaces_members (space_id ):
86- endpoint = f"{ CLOWDER_V1 } /api/spaces/{ space_id } /users"
92+ endpoint = f"{ CLOWDER_V1 } /api/spaces/{ space_id } /users?superAdmin=true "
8793 response = requests .get (endpoint , headers = clowder_headers_v1 , verify = False )
8894 return response .json ()
8995
9096
9197def get_clowder_v2_space_datasets (space_id ):
92- endpoint = f"{ CLOWDER_V1 } /api/spaces/{ space_id } /datasets"
98+ endpoint = f"{ CLOWDER_V1 } /api/spaces/{ space_id } /datasets?superAdmin=true "
9399 response = requests .get (endpoint , headers = clowder_headers_v1 , verify = False )
94100 return response .json ()
95101
@@ -265,32 +271,180 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
265271 dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
266272 if matching_folder :
267273 dataset_file_upload_endpoint += f"Multiple?folder_id={ matching_folder ['id' ]} "
268- file_exists = os .path .exists (filename )
269- # with open(filename, "rb") as file_data:
270274 response = requests .post (
271- dataset_file_upload_endpoint , headers = headers_v2 , files = {"file" : open (filename , "rb" )}
275+ dataset_file_upload_endpoint ,
276+ headers = headers_v2 ,
277+ files = {"file" : open (filename , "rb" )},
272278 )
273279
274- if response .status_code == 200 :
275- print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
276-
277280 # Clean up the local file after upload
278281 try :
279282 os .remove (filename )
280283 except Exception as e :
281284 print (f"Could not delete locally downloaded file: { filename } " )
282285 print (e )
283- print (f"Completed upload for file: { filename } " )
286+
287+ if response .status_code == 200 :
288+ print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
289+ return response .json ().get ("id" )
290+ else :
291+ print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
292+
293+ return None
294+
295+
296+ def add_file_metadata (file_v1 , file_v2_id , headers_v1 , headers_v2 ):
297+ # Get metadata from Clowder V1
298+ endpoint = f"{ CLOWDER_V1 } /api/files/{ file_v1 ['id' ]} /metadata.jsonld?superAdmin=true"
299+ metadata_v1 = requests .get (endpoint , headers = headers_v1 ).json ()
300+
301+ # Iterate through the metadata and post it to Clowder V2
302+ for metadata in metadata_v1 :
303+ # Extract and map each key-value pair from the metadata's content
304+ if "content" in metadata :
305+ for key , value in metadata ["content" ].items ():
306+ # Define the payload to send to V2
307+ metadata_payload_v2 = {
308+ "definition" : key ,
309+ "content" : metadata ["content" ],
310+ }
311+
312+ # Check if the metadata definition exists;
313+ # if exists, post to user metadat; otherwise, post to machine metadata
314+ v2_metadata_endpoint = (
315+ f"{ CLOWDER_V2 } /api/v2/files/{ file_v2_id } /metadata"
316+ )
317+ if check_metadata_definition_exists (
318+ CLOWDER_V2 , key , headers = headers_v2
319+ ):
320+ response = requests .post (
321+ v2_metadata_endpoint ,
322+ json = metadata_payload_v2 ,
323+ headers = headers_v2 ,
324+ )
325+
326+ if response .status_code != 200 :
327+ print (f"Failed to post file metadata to V2: { response .text } " )
328+ else :
329+ print (f"Successfully posted file metadata to V2: { key } " )
330+ else :
331+ if "agent" in metadata and "listener" not in metadata :
332+ metadata ["listener" ] = {
333+ "name" : "migration" ,
334+ "version" : "1" ,
335+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
336+ }
337+ response = requests .post (
338+ v2_metadata_endpoint , json = metadata , headers = headers_v2
339+ )
340+
341+ if response .status_code != 200 :
342+ print (f"Failed to post file metadata to V2: { response .text } " )
343+ else :
344+ print ("Successfully posted file machine metadata to V2" )
345+ break # machine metadata no need to iterate through all the keys
346+
347+
348+ def add_dataset_metadata (dataset_v1 , dataset_v2_id , headers_v1 , headers_v2 ):
349+ # Get metadata from Clowder V1
350+ endpoint = (
351+ f"{ CLOWDER_V1 } /api/datasets/{ dataset_v1 ['id' ]} /metadata.jsonld?superAdmin=true"
352+ )
353+ metadata_v1 = requests .get (endpoint , headers = headers_v1 ).json ()
354+
355+ # Iterate through the metadata and post it to Clowder V2
356+ for metadata in metadata_v1 :
357+ # Extract and map each key-value pair from the metadata's content
358+ if "content" in metadata :
359+ for key , value in metadata ["content" ].items ():
360+ # Define the payload to send to V2
361+ metadata_payload_v2 = {
362+ "definition" : key ,
363+ "content" : metadata ["content" ],
364+ }
365+
366+ # Check if the metadata definition exists;
367+ # if exists, post to user metadat; otherwise, post to machine metadata
368+ v2_metadata_endpoint = (
369+ f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /metadata"
370+ )
371+ if check_metadata_definition_exists (
372+ CLOWDER_V2 , key , headers = headers_v2
373+ ):
374+ response = requests .post (
375+ v2_metadata_endpoint ,
376+ json = metadata_payload_v2 ,
377+ headers = headers_v2 ,
378+ )
379+
380+ if response .status_code != 200 :
381+ print (f"Failed to post dataset metadata to V2: { response .text } " )
382+ else :
383+ print (f"Successfully posted dataset metadata to V2: { key } " )
384+ else :
385+ if "agent" in metadata and "listener" not in metadata :
386+ metadata ["listener" ] = {
387+ "name" : "migration" ,
388+ "version" : "1" ,
389+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
390+ }
391+ response = requests .post (
392+ v2_metadata_endpoint , json = metadata , headers = headers_v2
393+ )
394+
395+ if response .status_code != 200 :
396+ print (f"Failed to post dataset metadata to V2: { response .text } " )
397+ else :
398+ print ("Successfully posted dataset machine metadata to V2" )
399+ break # machine metadata no need to iterate through all the keys
400+
401+
402+ def register_migration_extractor ():
403+ """Register the migration extractor in Clowder v2."""
404+ migration_extractor = {
405+ "name" : "migration" ,
406+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
407+ "version" : "1" ,
408+ "author" : "Clowder Devs" ,
409+ }
410+
411+ # check if migration extractor already exists
412+ search_endpoint = f"{ CLOWDER_V2 } /api/v2/listeners/search"
413+ search_params = {"text" : migration_extractor ["name" ]}
414+ search_response = requests .get (
415+ search_endpoint , headers = clowder_headers_v2 , params = search_params
416+ )
417+
418+ # Check if extractor already exists
419+ if search_response .status_code == 200 :
420+ search_data = search_response .json ()
421+ if search_data .get ("metadata" , {}).get ("total_count" , 0 ) > 0 :
422+ for existing_extractor in search_response .json ().get ("data" , []):
423+ if existing_extractor .get ("name" ) == migration_extractor ["name" ]:
424+ print (
425+ f"Extractor { migration_extractor ['name' ]} already exists in Clowder v2."
426+ )
427+ return
428+
429+ endpoint = f"{ CLOWDER_V2 } /api/v2/extractors"
430+ response = requests .post (
431+ endpoint , json = migration_extractor , headers = clowder_headers_v2
432+ )
433+
434+ if response .status_code == 200 :
435+ print ("Successfully registered migration extractor in Clowder v2." )
436+ else :
437+ print (
438+ f"Failed to register migration extractor in Clowder v2. Status code: { response .status_code } "
439+ )
284440
285441
286442def process_user_and_resources (user_v1 , USER_MAP , DATASET_MAP ):
287443 """Process user resources from Clowder v1 to Clowder v2."""
288444 user_v1_datasets = get_clowder_v1_user_datasets (user_id = user_v1 ["id" ])
289445 user_v2_api_key = create_local_user (user_v1 )
290446 USER_MAP [user_v1 ["id" ]] = user_v2_api_key
291- base_user_headers_v2 = {
292- "x-api-key" : user_v2_api_key
293- }
447+ base_user_headers_v2 = {"x-api-key" : user_v2_api_key }
294448 user_headers_v2 = {
295449 "x-api-key" : user_v2_api_key ,
296450 "content-type" : "application/json" ,
@@ -301,6 +455,7 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
301455 print (f"Creating dataset in v2: { dataset ['id' ]} - { dataset ['name' ]} " )
302456 dataset_v2_id = create_v2_dataset (dataset , user_headers_v2 )
303457 DATASET_MAP [dataset ["id" ]] = dataset_v2_id
458+ add_dataset_metadata (dataset , dataset_v2_id , base_headers_v1 , user_headers_v2 )
304459 add_dataset_folders (dataset , dataset_v2_id , user_headers_v2 )
305460 print ("Created folders in the new dataset" )
306461
@@ -316,34 +471,33 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
316471 files_result = files_response .json ()
317472
318473 for file in files_result :
319- download_and_upload_file (
474+ file_v2_id = download_and_upload_file (
320475 file , all_dataset_folders , dataset_v2_id , base_user_headers_v2
321476 )
477+ if file_v2_id is not None :
478+ add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
479+
322480 return [USER_MAP , DATASET_MAP ]
323481
324482
325483if __name__ == "__main__" :
326- # users_v1 = get_clowder_v1_users()
484+ ##############################################################################################################
485+ # migrate metadata definition
486+ v1_md_definitions = get_clowder_v1_metadata_definitions (CLOWDER_V1 , base_headers_v1 )
487+ posted_ids = []
488+ for v1_md in v1_md_definitions :
489+ definition_id = post_metadata_definition (v1_md , CLOWDER_V2 , clowder_headers_v2 )
490+ if definition_id :
491+ posted_ids .append (definition_id )
492+
493+ ##############################################################################################################
494+ # Register the migration extractor in Clowder v2
495+ register_migration_extractor ()
496+
497+ ##############################################################################################################
498+ # migrate users and resources
327499 USER_MAP = {}
328500 DATASET_MAP = {}
329- users_v1 = [
330- {
331- "@context" : {
332- "firstName" : "http://schema.org/Person/givenName" ,
333- "lastName" : "http://schema.org/Person/familyName" ,
334- "email" : "http://schema.org/Person/email" ,
335- "affiliation" : "http://schema.org/Person/affiliation" ,
336- },
337- "id" : "576313ce1407b25fe19fc381" ,
338- "firstName" : "Chen" ,
339- "lastName" : "Wang" ,
340- "fullName" : "Chen Wang" ,
341- 342- "avatar" : "http://www.gravatar.com/avatar/2f97a52f2214949c4172d7fb796f173e?d=404" ,
343- "profile" : {},
344- "identityProvider" :
"Chen Wang ([email protected] ) [Local Account]" ,
345- }
346- ]
347501 users_v1 = get_clowder_v1_users ()
348502 for user_v1 in users_v1 :
349503 if (
@@ -357,21 +511,23 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
357511 else :
358512 print (f"Skipping user { user_v1 ['email' ]} as it is not a local account." )
359513
360- print ("Now migrating spaces." )
361- for user_v1 in users_v1 :
362- print (f"Migrating spaces of user { user_v1 ['email' ]} " )
363- user_v1_spaces = get_clowder_v1_user_spaces (user_v1 )
364- user_v2_api_key = USER_MAP [user_v1 ["id" ]]
365- for space in user_v1_spaces :
366- group_id = create_v2_group (space , headers = {"X-API-key" : user_v2_api_key })
367- add_v1_space_members_to_v2_group (
368- space , group_id , headers = {"X-API-key" : user_v2_api_key }
369- )
370- space_datasets = get_clowder_v2_space_datasets (space ["id" ])
371- for space_dataset in space_datasets :
372- dataset_v2_id = DATASET_MAP [space_dataset ["id" ]]
373- share_dataset_with_group (
374- group_id , space , headers = {"X-API-key" : user_v2_api_key }
375- )
376- print (f"Migrated spaces of user { user_v1 ['email' ]} " )
514+ ##############################################################################################################
515+ # migrate spaces
516+ # print("Now migrating spaces.")
517+ # for user_v1 in users_v1:
518+ # print(f"Migrating spaces of user {user_v1['email']}")
519+ # user_v1_spaces = get_clowder_v1_user_spaces(user_v1)
520+ # user_v2_api_key = USER_MAP[user_v1["id"]]
521+ # for space in user_v1_spaces:
522+ # group_id = create_v2_group(space, headers={"X-API-key": user_v2_api_key})
523+ # add_v1_space_members_to_v2_group(
524+ # space, group_id, headers={"X-API-key": user_v2_api_key}
525+ # )
526+ # space_datasets = get_clowder_v2_space_datasets(space["id"])
527+ # for space_dataset in space_datasets:
528+ # dataset_v2_id = DATASET_MAP[space_dataset["id"]]
529+ # share_dataset_with_group(
530+ # group_id, space, headers={"X-API-key": user_v2_api_key}
531+ # )
532+ # print(f"Migrated spaces of user {user_v1['email']}")
377533 print ("Migration complete." )
0 commit comments