@@ -173,6 +173,22 @@ def get_clowder_v1_user_collections(headers, user_v1):
173173 response = requests .get (endpoint , headers = headers )
174174 return [col for col in response .json () if col ["authorId" ] == user_v1 ["id" ]]
175175
176+
177+ def get_clowder_v1_user_collections_top_level (headers , user_v1 ):
178+ top_level_collections = []
179+
180+ endpoint = f"{ CLOWDER_V1 } /api/collections/topLevelCollections"
181+ response = requests .get (endpoint , headers = headers )
182+ response_json = response .json ()
183+ for col in response_json :
184+ author = col ["author" ]
185+ author_id = author .lstrip ('MiniUser(' )
186+ author_id = author_id [:author_id .index (',' )]
187+ if author_id == user_v1 ["id" ]:
188+ top_level_collections .append (col )
189+ return top_level_collections
190+
191+
176192# TODO this is too slow, we need to optimize it
177193def get_clowder_v1_dataset_collections (headers , user_v1 , dataset_id ):
178194 matching_collections = []
@@ -769,6 +785,117 @@ def build_collection_space_metadata_for_v1_dataset(dataset, user_v1, headers):
769785 print (f"Got space and collection metadata from dataset { dataset_id } " )
770786 return metadata
771787
788+ def process_user_and_resources_collections (user_v1 , USER_MAP , DATASET_MAP , COLLETIONS_MAP ):
789+ """Process user resources from Clowder v1 to Clowder v2."""
790+
791+ # get collections of the user
792+
793+ user_v1_datasets = get_clowder_v1_user_datasets (user_id = user_v1 ["id" ])
794+ user_v1_datasets = user_v1_datasets [:2 ]
795+ user_v2_api_key = create_local_user (user_v1 )
796+ USER_MAP [user_v1 ["id" ]] = user_v2_api_key
797+ base_user_headers_v2 = {"x-api-key" : user_v2_api_key }
798+ user_headers_v2 = {
799+ "x-api-key" : user_v2_api_key ,
800+ "content-type" : "application/json" ,
801+ "accept" : "application/json" ,
802+ }
803+
804+ user_v1_collections = get_clowder_v1_user_collections_top_level (
805+ headers = clowder_headers_v1 , user_v1 = user_v1
806+ )
807+
808+ print (f"Got { len (user_v1_collections )} user collections in the top level" )
809+
810+ for dataset in user_v1_datasets :
811+ print (f"Creating dataset in v2: { dataset ['id' ]} - { dataset ['name' ]} " )
812+ # TODO: check if dataset is in toml_exclude_dataset_id
813+ dataset_v1_id = dataset ["id" ]
814+ dataset_v1_spaces = dataset ["spaces" ]
815+ # TODO check if dataset is in toml_space_ids or exclude_space_ids
816+ MIGRATE_DATASET = True
817+ print (toml_exclude_dataset_ids )
818+ print (toml_space_ids )
819+ print (toml_exclude_space_ids )
820+ # Check if dataset is in the excluded dataset list
821+ if dataset_v1_id in toml_exclude_dataset_ids :
822+ print (f"Skipping dataset { dataset_v1_id } as it is in the exclude list." )
823+ MIGRATE_DATASET = False
824+ # Check if dataset is in the specified space list
825+ if toml_space_ids is not None and len (toml_space_ids ) > 0 :
826+ if not any (
827+ space_id in dataset_v1_spaces for space_id in toml_space_ids
828+ ):
829+ print (
830+ f"Skipping dataset { dataset_v1_id } as it is not in the specified spaces."
831+ )
832+ MIGRATE_DATASET = False
833+ if toml_exclude_space_ids is not None and len (toml_exclude_space_ids ) > 0 :
834+ if any (
835+ space_id in dataset_v1_spaces for space_id in toml_exclude_space_ids
836+ ):
837+ print (
838+ f"Skipping dataset { dataset_v1_id } as it is in the excluded spaces."
839+ )
840+ MIGRATE_DATASET = False
841+ if MIGRATE_DATASET :
842+ dataset_v2_id = create_v2_dataset (dataset , user_headers_v2 )
843+ DATASET_MAP [dataset ["id" ]] = dataset_v2_id
844+ add_dataset_metadata (dataset , dataset_v2_id , base_headers_v1 , user_headers_v2 )
845+ add_dataset_folders (dataset , dataset_v2_id , user_headers_v2 )
846+ print ("Created folders in the new dataset" )
847+
848+ all_dataset_folders = get_folder_and_subfolders (dataset_v2_id , user_headers_v2 )
849+
850+ # Retrieve files for the dataset in Clowder v1
851+ dataset_files_endpoint = (
852+ f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /files?superAdmin=true"
853+ )
854+ files_response = requests .get (
855+ dataset_files_endpoint , headers = clowder_headers_v1 , verify = False
856+ )
857+ files_result = files_response .json ()
858+
859+ for file in files_result :
860+ file_v2_id = download_and_upload_file (
861+ file , all_dataset_folders , dataset_v2_id , base_user_headers_v2
862+ )
863+ if file_v2_id is not None :
864+ add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
865+ # posting the collection hierarchy as metadata
866+ try :
867+ collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset (
868+ dataset = dataset , user_v1 = user_v1 , headers = clowder_headers_v1
869+ )
870+ except Exception as e :
871+ print (e )
872+ migration_extractor_collection_metadata = {
873+ "listener" : {
874+ "name" : "migration" ,
875+ "version" : "1" ,
876+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
877+ },
878+ "context_url" : "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld" ,
879+ "content" : collection_space_metadata_dict ,
880+ "contents" : collection_space_metadata_dict ,
881+ }
882+ v2_metadata_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /metadata"
883+ response = requests .post (
884+ v2_metadata_endpoint ,
885+ json = migration_extractor_collection_metadata ,
886+ headers = clowder_headers_v2 ,
887+ )
888+ if response .status_code == 200 :
889+ print ("Successfully added collection info as metadata in v2." )
890+ else :
891+ print (
892+ f"Failed to add collection info as metadata in Clowder v2. Status code: { response .status_code } "
893+ )
894+ else :
895+ print (f"Skipping dataset { dataset_v1_id } as it does not meet the criteria." )
896+
897+ return [USER_MAP , DATASET_MAP ]
898+
772899
773900def process_user_and_resources (user_v1 , USER_MAP , DATASET_MAP ):
774901 """Process user resources from Clowder v1 to Clowder v2."""
@@ -891,6 +1018,7 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
8911018 # migrate users and resources
8921019 USER_MAP = {}
8931020 DATASET_MAP = {}
1021+ COLLECTIONS_MAP = {}
8941022 users_v1 = get_clowder_v1_users ()
8951023 # TODO filter if toml users
8961024 if toml_users is not None and len (toml_users ) > 0 :
@@ -905,8 +1033,8 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
9051033 "[Local Account]" in user_v1 ["identityProvider" ]
9061034 and user_v1 ["email" ] != admin_user ["email" ]
9071035 ):
908- [USER_MAP , DATASET_MAP ] = process_user_and_resources (
909- user_v1 , USER_MAP , DATASET_MAP
1036+ [USER_MAP , DATASET_MAP ] = process_user_and_resources_collections (
1037+ user_v1 , USER_MAP , DATASET_MAP , COLLECTIONS_MAP
9101038 )
9111039 print (f"Migrated user { user_v1 ['email' ]} and associated resources." )
9121040 else :
0 commit comments