@@ -735,55 +735,85 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
735735
736736 for dataset in user_v1_datasets :
737737 print (f"Creating dataset in v2: { dataset ['id' ]} - { dataset ['name' ]} " )
738- dataset_v2_id = create_v2_dataset (dataset , user_headers_v2 )
739- DATASET_MAP [dataset ["id" ]] = dataset_v2_id
740- add_dataset_metadata (dataset , dataset_v2_id , base_headers_v1 , user_headers_v2 )
741- add_dataset_folders (dataset , dataset_v2_id , user_headers_v2 )
742- print ("Created folders in the new dataset" )
743-
744- all_dataset_folders = get_folder_and_subfolders (dataset_v2_id , user_headers_v2 )
745-
746- # Retrieve files for the dataset in Clowder v1
747- dataset_files_endpoint = (
748- f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /files?superAdmin=true"
749- )
750- files_response = requests .get (
751- dataset_files_endpoint , headers = clowder_headers_v1 , verify = False
752- )
753- files_result = files_response .json ()
738+ # TODO: check if dataset is in toml_exclude_dataset_id
739+ dataset_v1_id = dataset ["id" ]
740+ dataset_v1_spaces = dataset ["spaces" ]
741+ # TODO check if dataset is in toml_space_ids or exclude_space_ids
742+ MIGRATE_DATASET = True
743+ print (toml_exclude_dataset_ids )
744+ print (toml_space_ids )
745+ print (toml_exclude_space_ids )
746+ # Check if dataset is in the excluded dataset list
747+ if dataset_v1_id in toml_exclude_dataset_ids :
748+ print (f"Skipping dataset { dataset_v1_id } as it is in the exclude list." )
749+ MIGRATE_DATASET = False
750+ # Check if dataset is in the specified space list
751+ if toml_space_ids is not None and len (toml_space_ids ) > 0 :
752+ if not any (
753+ space_id in dataset_v1_spaces for space_id in toml_space_ids
754+ ):
755+ print (
756+ f"Skipping dataset { dataset_v1_id } as it is not in the specified spaces."
757+ )
758+ MIGRATE_DATASET = False
759+ if toml_exclude_space_ids is not None and len (toml_exclude_space_ids ) > 0 :
760+ if any (
761+ space_id in dataset_v1_spaces for space_id in toml_exclude_space_ids
762+ ):
763+ print (
764+ f"Skipping dataset { dataset_v1_id } as it is in the excluded spaces."
765+ )
766+ MIGRATE_DATASET = False
767+ if MIGRATE_DATASET :
768+ dataset_v2_id = create_v2_dataset (dataset , user_headers_v2 )
769+ DATASET_MAP [dataset ["id" ]] = dataset_v2_id
770+ add_dataset_metadata (dataset , dataset_v2_id , base_headers_v1 , user_headers_v2 )
771+ add_dataset_folders (dataset , dataset_v2_id , user_headers_v2 )
772+ print ("Created folders in the new dataset" )
773+
774+ all_dataset_folders = get_folder_and_subfolders (dataset_v2_id , user_headers_v2 )
775+
776+ # Retrieve files for the dataset in Clowder v1
777+ dataset_files_endpoint = (
778+ f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /files?superAdmin=true"
779+ )
780+ files_response = requests .get (
781+ dataset_files_endpoint , headers = clowder_headers_v1 , verify = False
782+ )
783+ files_result = files_response .json ()
754784
755- for file in files_result :
756- file_v2_id = download_and_upload_file (
757- file , all_dataset_folders , dataset_v2_id , base_user_headers_v2
785+ for file in files_result :
786+ file_v2_id = download_and_upload_file (
787+ file , all_dataset_folders , dataset_v2_id , base_user_headers_v2
788+ )
789+ if file_v2_id is not None :
790+ add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
791+ # posting the collection hierarchy as metadata
792+ collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset (
793+ dataset = dataset , user_v1 = user_v1 , headers = clowder_headers_v1
758794 )
759- if file_v2_id is not None :
760- add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
761- # posting the collection hierarchy as metadata
762- collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset (
763- dataset = dataset , user_v1 = user_v1 , headers = clowder_headers_v1
764- )
765- migration_extractor_collection_metadata = {
766- "listener" : {
767- "name" : "migration" ,
768- "version" : "1" ,
769- "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
770- },
771- "context_url" : "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld" ,
772- "content" : collection_space_metadata_dict ,
773- "contents" : collection_space_metadata_dict ,
774- }
775- v2_metadata_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /metadata"
776- response = requests .post (
777- v2_metadata_endpoint ,
778- json = migration_extractor_collection_metadata ,
779- headers = clowder_headers_v2 ,
780- )
781- if response .status_code == 200 :
782- print ("Successfully added collection info as metadata in v2." )
783- else :
784- print (
785- f"Failed to add collection info as metadata in Clowder v2. Status code: { response .status_code } "
795+ migration_extractor_collection_metadata = {
796+ "listener" : {
797+ "name" : "migration" ,
798+ "version" : "1" ,
799+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
800+ },
801+ "context_url" : "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld" ,
802+ "content" : collection_space_metadata_dict ,
803+ "contents" : collection_space_metadata_dict ,
804+ }
805+ v2_metadata_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /metadata"
806+ response = requests .post (
807+ v2_metadata_endpoint ,
808+ json = migration_extractor_collection_metadata ,
809+ headers = clowder_headers_v2 ,
786810 )
811+ if response .status_code == 200 :
812+ print ("Successfully added collection info as metadata in v2." )
813+ else :
814+ print (
815+ f"Failed to add collection info as metadata in Clowder v2. Status code: { response .status_code } "
816+ )
787817
788818 return [USER_MAP , DATASET_MAP ]
789819
0 commit comments