@@ -221,13 +221,6 @@ def process_collection_descendants(collection, headers_v1, base_headers_v2, head
221221 process_dataset_files_and_folders (dataset , headers_v1 , base_headers_v2 , 'folder' , new_folder ['id' ], v2_dataset_id , new_folder )
222222
223223
224-
225- def process_dataset_folders (dataset , headers_v1 , headers_v2 , parent_type , parent_id ):
226- folder_endpoint = f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /folders"
227- folder_response = requests .get (folder_endpoint , headers = headers_v1 )
228- folder_json = folder_response .json ()
229- print (f"Got dataset folders" )
230-
231224def get_v1_dataset_folders (dataset , headers_v1 , headers_v2 , parent_type , parent_id ):
232225 folder_endpoint = f"{ CLOWDER_V1 } /api/datasets/{ dataset ['id' ]} /folders"
233226 folder_response = requests .get (folder_endpoint , headers = headers_v1 )
@@ -293,9 +286,6 @@ def create_v2_dataset_from_collection(collection, user_v1, headers_v1, headers_v
293286 return response .json ()["id" ]
294287
295288
296- # go through sub collections creating folders
297- print ("Creating v2-dataset from collection" )
298-
299289
300290# TODO this is too slow, we need to optimize it
301291def get_clowder_v1_dataset_collections (headers , user_v1 , dataset_id ):
@@ -544,7 +534,7 @@ def create_v2_group(space, headers):
544534 response = requests .post (group_in_v2_endpoint , json = group , headers = headers )
545535 return response .json ()["id" ]
546536
547- # TODO try this
537+
548538def add_folder_hierarchy_to_migration_folder (folder_hierarchy , dataset_v2 , folder_id_v2 , headers ):
549539 """Add folder hierarchy to a dataset in Clowder v2."""
550540 hierarchy_parts = folder_hierarchy .split ("/" )
@@ -571,7 +561,7 @@ def add_folder_hierarchy(folder_hierarchy, dataset_v2, headers):
571561 if result :
572562 current_parent = result ["id" ]
573563
574- # for creating a folder for a dataset
564+ # for creating a folder for a dataset that is migrated to a dataset
575565def create_dataset_folder_if_not_exists_or_get (folder , parent , dataset_v2 , headers ):
576566 """Create a folder if it does not exist or return the existing folder."""
577567 # current_folders = get_folder_and_subfolders(dataset_v2, headers)
@@ -591,6 +581,7 @@ def create_dataset_folder_if_not_exists_or_get(folder, parent, dataset_v2, heade
591581 )
592582 return response .json ()
593583
584+ # used for creating folders and subfolders when a collection is migrated to a dataset
594585def create_folder_if_not_exists_or_get (folder , parent , parent_type , dataset_v2 , headers ):
595586 """Create a folder if it does not exist or return the existing folder."""
596587 # current_folders = get_folder_and_subfolders(dataset_v2, headers)
@@ -641,182 +632,6 @@ def add_dataset_folders(dataset_v1, dataset_v2, headers):
641632 add_folder_hierarchy (folder ["name" ], dataset_v2 , headers )
642633
643634
644- def download_and_upload_file_to_folder (file , folder , dataset_v2_id , headers_v2 ):
645- """Download a file from Clowder v1 and upload it to Clowder v2."""
646- filename = file ["filename" ]
647- file_id = file ["id" ]
648- file_folder = folder
649-
650- # Download the file from Clowder v1
651- v1_download_url = f"{ CLOWDER_V1 } /api/files/{ file_id } ?superAdmin=true"
652- print (f"Downloading file: { filename } " )
653- download_response = requests .get (v1_download_url , headers = clowder_headers_v1 )
654-
655- with open (filename , "wb" ) as f :
656- f .write (download_response .content )
657-
658- # Upload the file to Clowder v2
659- dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
660- if folder is not None :
661- # add folder if it is not None
662- folder_id = folder ["id" ]
663- dataset_file_upload_endpoint += f"Multiple?folder_id={ folder_id } "
664- files = [
665- ("files" , open (filename , "rb" )),
666- ]
667- response = requests .post (
668- dataset_file_upload_endpoint ,
669- headers = headers_v2 ,
670- files = files ,
671- )
672- if response .status_code == 200 :
673- print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
674- return response .json ().get ("id" )
675- else :
676- print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
677-
678- # Clean up the local file after upload
679- try :
680- os .remove (filename )
681- except Exception as e :
682- print (f"Could not delete locally downloaded file: { filename } " )
683- print (e )
684- return None
685-
686- def download_and_upload_file_to_folder_id (file , folder_v2 , dataset_v2_id , headers_v2 ):
687- """Download a file from Clowder v1 and upload it to Clowder v2."""
688- filename = file ["filename" ]
689- file_id = file ["id" ]
690- file_folder = file .get ("folders" , None )
691-
692- # Download the file from Clowder v1
693- v1_download_url = f"{ CLOWDER_V1 } /api/files/{ file_id } ?superAdmin=true"
694- print (f"Downloading file: { filename } " )
695- download_response = requests .get (v1_download_url , headers = clowder_headers_v1 )
696-
697- with open (filename , "wb" ) as f :
698- f .write (download_response .content )
699-
700- file_exists = os .path .exists (filename )
701- # Upload the file to Clowder v2
702- dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
703- if folder_v2 :
704- dataset_file_upload_endpoint += f"Multiple?folder_id={ folder_v2 ['id' ]} "
705- response = requests .post (
706- dataset_file_upload_endpoint ,
707- headers = headers_v2 ,
708- files = [("files" , (filename , open (filename , "rb" )))],
709- )
710- else :
711- response = requests .post (
712- dataset_file_upload_endpoint ,
713- headers = headers_v2 ,
714- files = {"file" : open (filename , "rb" )},
715- )
716-
717- # Clean up the local file after upload
718- try :
719- os .remove (filename )
720- except Exception as e :
721- print (f"Could not delete locally downloaded file: { filename } " )
722- print (e )
723-
724- if response .status_code == 200 :
725- print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
726- return response .json ().get ("id" )
727- else :
728- print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
729-
730- return None
731-
732- # def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, headers_v2):
733- # """Download a file from Clowder v1 and upload it to Clowder v2."""
734- # filename = file["filename"]
735- # file_id = file["id"]
736- #
737- # # DEBUG: Print all inputs
738- # print(f"=== DEBUG START ===")
739- # print(f"File: {file}")
740- # print(f"Folder_v2: {folder_v2}")
741- # print(f"Dataset_v2_id: {dataset_v2_id}")
742- # print(f"Headers_v2 keys: {list(headers_v2.keys()) if headers_v2 else 'None'}")
743- #
744- # # Download the file from Clowder v1
745- # v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
746- # print(f"Downloading file: {filename} from {v1_download_url}")
747- # download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
748- # print(f"Download status: {download_response.status_code}")
749- #
750- # with open(filename, "wb") as f:
751- # f.write(download_response.content)
752- #
753- # # Check file exists and has content
754- # file_size = os.path.getsize(filename)
755- # print(f"Local file size: {file_size} bytes")
756- #
757- # # Upload the file to Clowder v2
758- # dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
759- #
760- # if folder_v2 is not None:
761- # folder_id = folder_v2['id'] if isinstance(folder_v2, dict) else folder_v2.id
762- # dataset_file_upload_endpoint += f"Multiple?folder_id={folder_id}"
763- #
764- # print(f"Upload endpoint: {dataset_file_upload_endpoint}")
765- #
766- # # Read file content to verify it's not corrupted
767- # with open(filename, "rb") as f:
768- # file_content = f.read()
769- # print(f"File content length: {len(file_content)}")
770- # print(f"File content starts with: {file_content[:100]}...")
771- #
772- # # Make the upload request with detailed debugging
773- # with open(filename, "rb") as file_obj:
774- # files = {"file": (filename, file_obj)}
775- #
776- # print(f"Final files dict: {files}")
777- # # Create headers without content-type for file uploads
778- # upload_headers = headers_v2.copy()
779- # upload_headers.pop('content-type', None)
780- # print(f"Final headers: {upload_headers}")
781- #
782- # # Use a session to see raw request
783- # session = requests.Session()
784- # prepared_request = requests.Request(
785- # 'POST',
786- # dataset_file_upload_endpoint,
787- # headers=upload_headers,
788- # files=files
789- # ).prepare()
790- #
791- # print(f"Prepared request URL: {prepared_request.url}")
792- # print(f"Prepared request headers: {dict(prepared_request.headers)}")
793- # # Don't print body as it's binary, but we can check content-type
794- # print(f"Content-Type header: {prepared_request.headers.get('Content-Type')}")
795- #
796- # response = session.send(prepared_request)
797- #
798- # # DEBUG: Full response analysis
799- # print(f"Response status: {response.status_code}")
800- # print(f"Response headers: {dict(response.headers)}")
801- # print(f"Response text: {response.text}")
802- # print(f"=== DEBUG END ===")
803- #
804- # # Clean up the local file after upload
805- # try:
806- # os.remove(filename)
807- # except Exception as e:
808- # print(f"Could not delete locally downloaded file: {filename}")
809- # print(e)
810- #
811- # if response.status_code == 200:
812- # print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
813- # return response.json().get("id")
814- # else:
815- # print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
816- #
817- # return None
818-
819-
820635def download_and_upload_file_to_matching_folder (file , dataset_v2_id , headers_v2 , matching_folder = None ):
821636 """Download a file from Clowder v1 and upload it to Clowder v2."""
822637 filename = file ["filename" ]
@@ -930,79 +745,6 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
930745
931746 return None
932747
933- def download_and_upload_file_1 (file , all_dataset_folders , dataset_v2_id , headers_v2 ):
934- """Download a file from Clowder v1 and upload it to Clowder v2."""
935- filename = file ["filename" ]
936- file_id = file ["id" ]
937- file_folder = file .get ("folders" , None )
938-
939- # Download the file from Clowder v1
940- v1_download_url = f"{ CLOWDER_V1 } /api/files/{ file_id } ?superAdmin=true"
941- print (f"Downloading file: { filename } " )
942- download_response = requests .get (v1_download_url , headers = clowder_headers_v1 )
943-
944- with open (filename , "wb" ) as f :
945- f .write (download_response .content )
946-
947- # Determine the correct folder in Clowder v2 for the upload
948- matching_folder = None
949- if file_folder :
950- matching_folder = next (
951- (
952- folder
953- for folder in all_dataset_folders
954- if folder ["name" ] == file_folder ["name" ]
955- ),
956- None ,
957- )
958-
959- # Upload the file to Clowder v2
960- dataset_file_upload_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /files"
961- if matching_folder :
962- dataset_file_upload_endpoint += f"Multiple?folder_id={ matching_folder ['id' ]} "
963-
964- # DEBUG: Add the same debugging as the new method
965- print (f"=== WORKING METHOD DEBUG ===" )
966- print (f"Upload endpoint: { dataset_file_upload_endpoint } " )
967- print (f"Headers: { headers_v2 } " )
968-
969- with open (filename , "rb" ) as file_obj :
970- files = {"file" : (filename , file_obj )}
971-
972- # Use a session to see raw request
973- session = requests .Session ()
974- prepared_request = requests .Request (
975- 'POST' ,
976- dataset_file_upload_endpoint ,
977- headers = headers_v2 ,
978- files = files
979- ).prepare ()
980-
981- print (f"Prepared request URL: { prepared_request .url } " )
982- print (f"Prepared request headers: { dict (prepared_request .headers )} " )
983- print (f"Content-Type header: { prepared_request .headers .get ('Content-Type' )} " )
984-
985- response = session .send (prepared_request )
986-
987- print (f"Response status: { response .status_code } " )
988- print (f"Response text: { response .text } " )
989- print (f"=== WORKING METHOD DEBUG END ===" )
990-
991- # Clean up the local file after upload
992- try :
993- os .remove (filename )
994- except Exception as e :
995- print (f"Could not delete locally downloaded file: { filename } " )
996- print (e )
997-
998- if response .status_code == 200 :
999- print (f"Uploaded file: { filename } to dataset { dataset_v2_id } " )
1000- return response .json ().get ("id" )
1001- else :
1002- print (f"Failed to upload file: { filename } to dataset { dataset_v2_id } " )
1003-
1004- return None
1005-
1006748def add_file_metadata (file_v1 , file_v2_id , headers_v1 , headers_v2 ):
1007749 # Get metadata from Clowder V1
1008750 endpoint = f"{ CLOWDER_V1 } /api/files/{ file_v1 ['id' ]} /metadata.jsonld?superAdmin=true"
0 commit comments