@@ -197,7 +197,7 @@ def process_collection_descendants(collection, headers_v1, base_headers_v2, head
197197 child_col_json = child_col_response .json ()
198198 dataset_json = dataset_response .json ()
199199
200- print ( f"Got child collections and datasets" )
200+ # the below handles creating folders for child collections
201201 for child in child_col_json :
202202 if v2_parent_type == "dataset" :
203203 print (f"Add folder to the dataset" )
@@ -211,20 +211,14 @@ def process_collection_descendants(collection, headers_v1, base_headers_v2, head
211211 new_folder = create_folder_if_not_exists_or_get (folder_name , v2_parent_id , v2_dataset_id , headers_v2 )
212212 process_collection_descendants (child , headers_v1 , base_headers_v2 , headers_v2 , new_folder ['id' ], 'folder' , v2_dataset_id )
213213
214+ # this handles uploading the datasets of the collection as folders
214215 for dataset in dataset_json :
215216 if v2_parent_type == "dataset" :
216- print (f"Parent is a dataset" )
217- new_folder = create_folder_if_not_exists_or_get (dataset ["name" ], v2_parent_id , v2_dataset_id , headers_v2 )
218- print (f"Now we need to add the sub folders of this dataset" )
219- # TODO get DATASET FOLDERS HERE FROM v1
220- process_dataset_folders (dataset , headers_v1 , headers_v2 , new_folder ['id' ], v2_dataset_id )
221- process_dataset_files (dataset , headers_v1 , base_headers_v2 , 'folder' , new_folder ['id' ], v2_dataset_id )
217+ new_folder = create_folder_if_not_exists_or_get (dataset ["name" ], v2_parent_id , v2_parent_type , v2_dataset_id , headers_v2 )
218+ process_dataset_files_and_folders (dataset , headers_v1 , base_headers_v2 , 'folder' , new_folder ['id' ], v2_dataset_id , new_folder )
222219 else :
223- print (f"Parent is a folder" )
224- new_folder = create_folder_if_not_exists_or_get (dataset ["name" ], v2_parent_id , v2_dataset_id , headers_v2 )
225- # TODO GET DATASET FOLDERS HERE FROM v1
226- process_dataset_folders (dataset , headers_v1 , headers_v2 , new_folder ['id' ], v2_dataset_id )
227- process_dataset_files (dataset , headers_v1 , base_headers_v2 , 'folder' , new_folder ['id' ], v2_dataset_id )
220+ new_folder = create_folder_if_not_exists_or_get (dataset ["name" ], v2_parent_id , v2_parent_type , v2_dataset_id , headers_v2 )
221+ process_dataset_files_and_folders (dataset , headers_v1 , base_headers_v2 , 'folder' , new_folder ['id' ], v2_dataset_id , new_folder )
228222
229223
230224
@@ -240,7 +234,8 @@ def get_v1_dataset_folders(dataset, headers_v1, headers_v2, parent_type, parent_
240234 folder_json = folder_response .json ()
241235 return folder_json
242236
243- def process_dataset_files (dataset , headers_v1 , headers_v2 , parent_type , parent_id , dataset_v2_id ):
237+ # processes a dataset adds folders and
238+ def process_dataset_files_and_folders (dataset , headers_v1 , headers_v2 , parent_type , parent_id , dataset_v2_id , dataset_v2_folder ):
244239 dataset_v1_folders = get_v1_dataset_folders (dataset , headers_v1 , headers_v2 , parent_type , parent_id )
245240
246241 for folder_v1 in dataset_v1_folders :
@@ -255,24 +250,19 @@ def process_dataset_files(dataset, headers_v1, headers_v2, parent_type, parent_i
255250 files_endpoint = f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /files"
256251 files_response = requests .get (files_endpoint , headers = headers_v1 )
257252 files_json = files_response .json ()
258- # TODO WORK HERE
253+ # go through files and upload them to the correct folder if they have one
259254 for file in files_json :
260255 if 'folders' in file :
261- print (f"This file is in a folder" )
262- current_file_folder_name = file ['folders' ]['name' ]
263- matching_folder = None
264256 for folder_v2 in all_v2_dataset_folders :
265257 if folder_v2 ['name' ] == file ['folders' ]['name' ]:
266258 print (f"Upload this file to a folder" )
267259 matching_folder = folder_v2
268260 download_and_upload_file_to_matching_folder (file , dataset_v2_id , base_headers_v2 , matching_folder )
269261 else :
270- print (f"This file is not in a folder" )
271- # TODO upload it to the folder
272262 if parent_type == "dataset" :
273263 print (f"Upload to a dataset" )
274264 if parent_type == "folder" :
275- print ( f"Upload to a folder" )
265+ download_and_upload_file_to_matching_folder ( file , dataset_v2_id , base_headers_v2 , dataset_v2_folder )
276266 print (f"Got dataset files" )
277267
278268
@@ -295,7 +285,10 @@ def create_v2_dataset_from_collection(collection, user_v1, headers_v1, headers_v
295285 new_dataset_json = response .json ()
296286 v2_dataset_id = new_dataset_json ["id" ]
297287
298- process_collection_descendants (collection , headers_v1 , base_headers_v2 , headers_v2 , new_dataset_json ["id" ], "dataset" , v2_dataset_id )
288+ process_collection_descendants (collection = collection , headers_v1 = headers_v1 ,
289+ base_headers_v2 = base_headers_v2 , headers_v2 = headers_v2 ,
290+ v2_parent_id = new_dataset_json ["id" ],
291+ v2_parent_type = "dataset" , v2_dataset_id = v2_dataset_id )
299292
300293 return response .json ()["id" ]
301294
@@ -560,7 +553,7 @@ def add_folder_hierarchy_to_migration_folder(folder_hierarchy, dataset_v2, folde
560553 current_parent = folder_id_v2
561554 for part in hierarchy_parts :
562555 result = create_folder_if_not_exists_or_get (
563- part , current_parent , dataset_v2 , headers
556+ part , current_parent , 'folder' , dataset_v2 , headers
564557 )
565558 if result :
566559 current_parent = result ["id" ]
@@ -579,13 +572,16 @@ def add_folder_hierarchy(folder_hierarchy, dataset_v2, headers):
579572 current_parent = result ["id" ]
580573
581574
582- def create_folder_if_not_exists_or_get (folder , parent , dataset_v2 , headers ):
575+ def create_folder_if_not_exists_or_get (folder , parent , parent_type , dataset_v2 , headers ):
583576 """Create a folder if it does not exist or return the existing folder."""
584577 # current_folders = get_folder_and_subfolders(dataset_v2, headers)
585578 current_all_folders = get_all_folder_and_subfolders (dataset_v2 , headers )
586- folder_data = (
587- {"name" : folder , "parent_folder" : parent } if parent else {"name" : folder }
588- )
579+ if parent_type == 'folder' :
580+ folder_data = (
581+ {"name" : folder , "parent_folder" : parent } if parent else {"name" : folder }
582+ )
583+ else :
584+ folder_data = {"name" : folder }
589585
590586 for existing_folder in current_all_folders :
591587 if existing_folder ["name" ] == folder :
@@ -838,6 +834,7 @@ def download_and_upload_file_to_matching_folder(file, dataset_v2_id, headers_v2,
838834 )
839835
840836 # Clean up the local file after upload
837+ print (f"Type response { type (response )} " )
841838 try :
842839 os .remove (filename )
843840 except Exception as e :
@@ -846,7 +843,11 @@ def download_and_upload_file_to_matching_folder(file, dataset_v2_id, headers_v2,
846843
847844 if response .status_code == 200 :
848845 print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
849- return response .json ().get ("id" )
846+ response_json = response .json ()
847+ if type (response_json ) == dict :
848+ return response .json ().get ("id" )
849+ elif type (response_json ) == list :
850+ return response_json [0 ].get ("id" )
850851 else :
851852 print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
852853
@@ -1249,9 +1250,25 @@ def process_user_and_resources_collections(user_v1, USER_MAP, DATASET_MAP, COLLE
12491250
12501251 print (f"Got { len (user_v1_collections )} user collections in the top level" )
12511252
1252- for top_level_col in user_v1_collections :
1253- dataset_v2 = create_v2_dataset_from_collection (top_level_col , user_v1 , clowder_headers_v1 ,user_headers_v2 , base_headers_v2 )
1254- print ('did this' )
1253+ # filter the collections by space
1254+ migrate_top_level_collections = []
1255+ for col in user_v1_collections :
1256+ collection_spaces = col ["spaces" ]
1257+ collection_spaces = collection_spaces .lstrip ('List(' )
1258+ collection_spaces = collection_spaces .rstrip (')' )
1259+ collection_spaces = collection_spaces .split (',' )
1260+ for space in collection_spaces :
1261+ if space in toml_space_ids :
1262+ migrate_top_level_collections .append (col )
1263+ break
1264+
1265+ # create datasets from the top level collections
1266+ for top_level_col in migrate_top_level_collections :
1267+ dataset_v2 = create_v2_dataset_from_collection (collection = top_level_col , user_v1 = user_v1 ,
1268+ headers_v1 = clowder_headers_v1 ,headers_v2 = user_headers_v2 ,
1269+ base_headers_v2 = base_headers_v2 )
1270+ print (f"Created dataset in v2 from collection: { top_level_col ['id' ]} - { top_level_col ['name' ]} " )
1271+ COLLETIONS_MAP [top_level_col ["id" ]] = dataset_v2
12551272
12561273 for dataset in user_v1_datasets :
12571274 print (f"Creating dataset in v2: { dataset ['id' ]} - { dataset ['name' ]} " )
0 commit comments