@@ -265,7 +265,7 @@ def process_dataset_files(dataset, headers_v1, headers_v2, parent_type, parent_i
265265 if folder_v2 ['name' ] == file ['folders' ]['name' ]:
266266 print (f"Upload this file to a folder" )
267267 matching_folder = folder_v2
268- download_and_upload_file ( )
268+ download_and_upload_file_to_folder_id ( file , matching_folder , dataset_v2_id , headers_v2 )
269269 else :
270270 print (f"This file is not in a folder" )
271271 # TODO upload it to the folder
@@ -568,6 +568,8 @@ def add_folder_hierarchy_to_migration_folder(folder_hierarchy, dataset_v2, folde
568568def add_folder_hierarchy (folder_hierarchy , dataset_v2 , headers ):
569569 """Add folder hierarchy to a dataset in Clowder v2."""
570570 hierarchy_parts = folder_hierarchy .split ("/" )
571+ if hierarchy_parts [0 ] == '' :
572+ hierarchy_parts = hierarchy_parts [1 :]
571573 current_parent = None
572574 for part in hierarchy_parts :
573575 result = create_folder_if_not_exists_or_get (
@@ -642,11 +644,15 @@ def download_and_upload_file_to_folder(file, folder, dataset_v2_id, headers_v2):
642644 dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
643645 if folder is not None :
644646 # add folder if it is not None
645- dataset_file_upload_endpoint += f"Multiple?folder_id={ folder ['id' ]} "
647+ folder_id = folder ["id" ]
648+ dataset_file_upload_endpoint += f"Multiple?folder_id={ folder_id } "
649+ files = [
650+ ("files" , open (filename , "rb" )),
651+ ]
646652 response = requests .post (
647653 dataset_file_upload_endpoint ,
648654 headers = headers_v2 ,
649- files = { "file" : open ( filename , "rb" )} ,
655+ files = files ,
650656 )
651657 if response .status_code == 200 :
652658 print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
@@ -662,6 +668,134 @@ def download_and_upload_file_to_folder(file, folder, dataset_v2_id, headers_v2):
662668 print (e )
663669 return None
664670
671+ def download_and_upload_file_to_folder_id (file , folder_v2 , dataset_v2_id , headers_v2 ):
672+ """Download a file from Clowder v1 and upload it to Clowder v2."""
673+ filename = file ["filename" ]
674+ file_id = file ["id" ]
675+ file_folder = file .get ("folders" , None )
676+
677+ # Download the file from Clowder v1
678+ v1_download_url = f"{ CLOWDER_V1 } /api/files/{ file_id } ?superAdmin=true"
679+ print (f"Downloading file: { filename } " )
680+ download_response = requests .get (v1_download_url , headers = clowder_headers_v1 )
681+
682+ with open (filename , "wb" ) as f :
683+ f .write (download_response .content )
684+
685+ file_exists = os .path .exists (filename )
686+ # Upload the file to Clowder v2
687+ dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
688+ if folder_v2 is not None :
689+ dataset_file_upload_endpoint += f"?folder_id={ folder_v2 ['id' ]} "
690+ response = requests .post (
691+ dataset_file_upload_endpoint ,
692+ headers = headers_v2 ,
693+ files = {"file" : open (filename , "rb" )},
694+ )
695+
696+ # Clean up the local file after upload
697+ # try:
698+ # os.remove(filename)
699+ # except Exception as e:
700+ # print(f"Could not delete locally downloaded file: {filename}")
701+ # print(e)
702+
703+ if response .status_code == 200 :
704+ print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
705+ return response .json ().get ("id" )
706+ else :
707+ print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
708+
709+ return None
710+
711+ # def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, headers_v2):
712+ # """Download a file from Clowder v1 and upload it to Clowder v2."""
713+ # filename = file["filename"]
714+ # file_id = file["id"]
715+ #
716+ # # DEBUG: Print all inputs
717+ # print(f"=== DEBUG START ===")
718+ # print(f"File: {file}")
719+ # print(f"Folder_v2: {folder_v2}")
720+ # print(f"Dataset_v2_id: {dataset_v2_id}")
721+ # print(f"Headers_v2 keys: {list(headers_v2.keys()) if headers_v2 else 'None'}")
722+ #
723+ # # Download the file from Clowder v1
724+ # v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
725+ # print(f"Downloading file: {filename} from {v1_download_url}")
726+ # download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
727+ # print(f"Download status: {download_response.status_code}")
728+ #
729+ # with open(filename, "wb") as f:
730+ # f.write(download_response.content)
731+ #
732+ # # Check file exists and has content
733+ # file_size = os.path.getsize(filename)
734+ # print(f"Local file size: {file_size} bytes")
735+ #
736+ # # Upload the file to Clowder v2
737+ # dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
738+ #
739+ # if folder_v2 is not None:
740+ # folder_id = folder_v2['id'] if isinstance(folder_v2, dict) else folder_v2.id
741+ # dataset_file_upload_endpoint += f"Multiple?folder_id={folder_id}"
742+ #
743+ # print(f"Upload endpoint: {dataset_file_upload_endpoint}")
744+ #
745+ # # Read file content to verify it's not corrupted
746+ # with open(filename, "rb") as f:
747+ # file_content = f.read()
748+ # print(f"File content length: {len(file_content)}")
749+ # print(f"File content starts with: {file_content[:100]}...")
750+ #
751+ # # Make the upload request with detailed debugging
752+ # with open(filename, "rb") as file_obj:
753+ # files = {"file": (filename, file_obj)}
754+ #
755+ # print(f"Final files dict: {files}")
756+ # # Create headers without content-type for file uploads
757+ # upload_headers = headers_v2.copy()
758+ # upload_headers.pop('content-type', None)
759+ # print(f"Final headers: {upload_headers}")
760+ #
761+ # # Use a session to see raw request
762+ # session = requests.Session()
763+ # prepared_request = requests.Request(
764+ # 'POST',
765+ # dataset_file_upload_endpoint,
766+ # headers=upload_headers,
767+ # files=files
768+ # ).prepare()
769+ #
770+ # print(f"Prepared request URL: {prepared_request.url}")
771+ # print(f"Prepared request headers: {dict(prepared_request.headers)}")
772+ # # Don't print body as it's binary, but we can check content-type
773+ # print(f"Content-Type header: {prepared_request.headers.get('Content-Type')}")
774+ #
775+ # response = session.send(prepared_request)
776+ #
777+ # # DEBUG: Full response analysis
778+ # print(f"Response status: {response.status_code}")
779+ # print(f"Response headers: {dict(response.headers)}")
780+ # print(f"Response text: {response.text}")
781+ # print(f"=== DEBUG END ===")
782+ #
783+ # # Clean up the local file after upload
784+ # try:
785+ # os.remove(filename)
786+ # except Exception as e:
787+ # print(f"Could not delete locally downloaded file: {filename}")
788+ # print(e)
789+ #
790+ # if response.status_code == 200:
791+ # print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
792+ # return response.json().get("id")
793+ # else:
794+ # print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
795+ #
796+ # return None
797+
798+
665799
666800def download_and_upload_file (file , all_dataset_folders , dataset_v2_id , headers_v2 ):
667801 """Download a file from Clowder v1 and upload it to Clowder v2."""
@@ -693,11 +827,17 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
693827 dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
694828 if matching_folder :
695829 dataset_file_upload_endpoint += f"Multiple?folder_id={ matching_folder ['id' ]} "
696- response = requests .post (
697- dataset_file_upload_endpoint ,
698- headers = headers_v2 ,
699- files = {"file" : open (filename , "rb" )},
700- )
830+ response = requests .post (
831+ dataset_file_upload_endpoint ,
832+ headers = headers_v2 ,
833+ files = [("files" , (filename , open (filename , "rb" )))],
834+ )
835+ else :
836+ response = requests .post (
837+ dataset_file_upload_endpoint ,
838+ headers = headers_v2 ,
839+ files = {"file" : open (filename , "rb" )},
840+ )
701841
702842 # Clean up the local file after upload
703843 try :
@@ -714,6 +854,78 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
714854
715855 return None
716856
857+ def download_and_upload_file_1 (file , all_dataset_folders , dataset_v2_id , headers_v2 ):
858+ """Download a file from Clowder v1 and upload it to Clowder v2."""
859+ filename = file ["filename" ]
860+ file_id = file ["id" ]
861+ file_folder = file .get ("folders" , None )
862+
863+ # Download the file from Clowder v1
864+ v1_download_url = f"{ CLOWDER_V1 } /api/files/{ file_id } ?superAdmin=true"
865+ print (f"Downloading file: { filename } " )
866+ download_response = requests .get (v1_download_url , headers = clowder_headers_v1 )
867+
868+ with open (filename , "wb" ) as f :
869+ f .write (download_response .content )
870+
871+ # Determine the correct folder in Clowder v2 for the upload
872+ matching_folder = None
873+ if file_folder :
874+ matching_folder = next (
875+ (
876+ folder
877+ for folder in all_dataset_folders
878+ if folder ["name" ] == file_folder ["name" ]
879+ ),
880+ None ,
881+ )
882+
883+ # Upload the file to Clowder v2
884+ dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
885+ if matching_folder :
886+ dataset_file_upload_endpoint += f"Multiple?folder_id={ matching_folder ['id' ]} "
887+
888+ # DEBUG: Add the same debugging as the new method
889+ print (f"=== WORKING METHOD DEBUG ===" )
890+ print (f"Upload endpoint: { dataset_file_upload_endpoint } " )
891+ print (f"Headers: { headers_v2 } " )
892+
893+ with open (filename , "rb" ) as file_obj :
894+ files = {"file" : (filename , file_obj )}
895+
896+ # Use a session to see raw request
897+ session = requests .Session ()
898+ prepared_request = requests .Request (
899+ 'POST' ,
900+ dataset_file_upload_endpoint ,
901+ headers = headers_v2 ,
902+ files = files
903+ ).prepare ()
904+
905+ print (f"Prepared request URL: { prepared_request .url } " )
906+ print (f"Prepared request headers: { dict (prepared_request .headers )} " )
907+ print (f"Content-Type header: { prepared_request .headers .get ('Content-Type' )} " )
908+
909+ response = session .send (prepared_request )
910+
911+ print (f"Response status: { response .status_code } " )
912+ print (f"Response text: { response .text } " )
913+ print (f"=== WORKING METHOD DEBUG END ===" )
914+
915+ # Clean up the local file after upload
916+ try :
917+ os .remove (filename )
918+ except Exception as e :
919+ print (f"Could not delete locally downloaded file: { filename } " )
920+ print (e )
921+
922+ if response .status_code == 200 :
923+ print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
924+ return response .json ().get ("id" )
925+ else :
926+ print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
927+
928+ return None
717929
718930def add_file_metadata (file_v1 , file_v2_id , headers_v1 , headers_v2 ):
719931 # Get metadata from Clowder V1
@@ -1207,9 +1419,12 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
12071419 "[Local Account]" in user_v1 ["identityProvider" ]
12081420 and user_v1 ["email" ] != admin_user ["email" ]
12091421 ):
1210- [USER_MAP , DATASET_MAP ] = process_user_and_resources_collections (
1211- user_v1 , USER_MAP , DATASET_MAP , COLLECTIONS_MAP
1422+ [USER_MAP , DATASET_MAP ] = process_user_and_resources (
1423+ user_v1 , USER_MAP , DATASET_MAP
12121424 )
1425+ # [USER_MAP, DATASET_MAP] = process_user_and_resources_collections(
1426+ # user_v1, USER_MAP, DATASET_MAP, COLLECTIONS_MAP
1427+ # )
12131428 print (f"Migrated user { user_v1 ['email' ]} and associated resources." )
12141429 else :
12151430 print (f"Skipping user { user_v1 ['email' ]} as it is not a local account." )
0 commit comments