Skip to content

Commit c76e75e

Browse files
committed
messy, but works, can post the data
1 parent 04fc4b2 commit c76e75e

File tree

1 file changed

+113
-27
lines changed

1 file changed

+113
-27
lines changed

scripts/migration/migrate.py

Lines changed: 113 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def get_clowder_v1_user_collections_top_level(headers, user_v1):
188188
top_level_collections.append(col)
189189
return top_level_collections
190190

191-
def process_collection_descendants(collection, headers_v1,headers_v2, v2_parent_id, v2_parent_type, v2_dataset_id):
191+
def process_collection_descendants(collection, headers_v1, base_headers_v2, headers_v2, v2_parent_id, v2_parent_type, v2_dataset_id):
192192
child_collections_endpoint = f"{CLOWDER_V1}/api/collections/{collection['id']}/getChildCollections"
193193
datasets_endpoint = f"{CLOWDER_V1}/api/collections/{collection['id']}/datasets"
194194

@@ -203,13 +203,13 @@ def process_collection_descendants(collection, headers_v1,headers_v2, v2_parent_
203203
print(f"Add folder to the dataset")
204204
folder_name = child["name"]
205205
new_folder = create_folder_if_not_exists_or_get(folder_name, None, v2_dataset_id, headers_v2)
206-
process_collection_descendants(child, headers_v1,headers_v2, new_folder['id'], 'folder', v2_dataset_id )
206+
process_collection_descendants(child, headers_v1, base_headers_v2, headers_v2, new_folder['id'], 'folder', v2_dataset_id )
207207
else:
208208
print(f"parent was a folder")
209209
print(f"Add folder to the dataset")
210210
folder_name = child["name"]
211211
new_folder = create_folder_if_not_exists_or_get(folder_name, v2_parent_id, v2_dataset_id, headers_v2)
212-
process_collection_descendants(child, headers_v1, headers_v2, new_folder['id'], 'folder', v2_dataset_id)
212+
process_collection_descendants(child, headers_v1, base_headers_v2, headers_v2, new_folder['id'], 'folder', v2_dataset_id)
213213

214214
for dataset in dataset_json:
215215
if v2_parent_type == "dataset":
@@ -218,13 +218,13 @@ def process_collection_descendants(collection, headers_v1,headers_v2, v2_parent_
218218
print(f"Now we need to add the sub folders of this dataset")
219219
# TODO get DATASET FOLDERS HERE FROM v1
220220
process_dataset_folders(dataset, headers_v1, headers_v2, new_folder['id'], v2_dataset_id)
221-
process_dataset_files(dataset, headers_v1, headers_v2, 'folder', new_folder['id'], v2_dataset_id)
221+
process_dataset_files(dataset, headers_v1, base_headers_v2, 'folder', new_folder['id'], v2_dataset_id)
222222
else:
223223
print(f"Parent is a folder")
224224
new_folder = create_folder_if_not_exists_or_get(dataset["name"], v2_parent_id, v2_dataset_id, headers_v2)
225225
# TODO GET DATASET FOLDERS HERE FROM v1
226226
process_dataset_folders(dataset, headers_v1, headers_v2, new_folder['id'], v2_dataset_id)
227-
process_dataset_files(dataset, headers_v1, headers_v2, 'folder', new_folder['id'], v2_dataset_id)
227+
process_dataset_files(dataset, headers_v1, base_headers_v2, 'folder', new_folder['id'], v2_dataset_id)
228228

229229

230230

@@ -265,7 +265,7 @@ def process_dataset_files(dataset, headers_v1, headers_v2, parent_type, parent_i
265265
if folder_v2['name'] == file['folders']['name']:
266266
print(f"Upload this file to a folder")
267267
matching_folder = folder_v2
268-
download_and_upload_file_to_folder_id(file, matching_folder, dataset_v2_id, headers_v2)
268+
download_and_upload_file_to_matching_folder(file, dataset_v2_id, base_headers_v2, matching_folder)
269269
else:
270270
print(f"This file is not in a folder")
271271
# TODO upload it to the folder
@@ -277,7 +277,7 @@ def process_dataset_files(dataset, headers_v1, headers_v2, parent_type, parent_i
277277

278278

279279

280-
def create_v2_dataset_from_collection(collection, user_v1, headers_v1, headers_v2):
280+
def create_v2_dataset_from_collection(collection, user_v1, headers_v1, headers_v2, base_headers_v2):
281281
# create the dataset
282282
collection_name = collection["name"]
283283
collection_description = collection["description"]
@@ -295,7 +295,7 @@ def create_v2_dataset_from_collection(collection, user_v1, headers_v1, headers_v
295295
new_dataset_json = response.json()
296296
v2_dataset_id = new_dataset_json["id"]
297297

298-
process_collection_descendants(collection, headers_v1, headers_v2, new_dataset_json["id"], "dataset", v2_dataset_id)
298+
process_collection_descendants(collection, headers_v1, base_headers_v2, headers_v2, new_dataset_json["id"], "dataset", v2_dataset_id)
299299

300300
return response.json()["id"]
301301

@@ -685,20 +685,26 @@ def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, header
685685
file_exists = os.path.exists(filename)
686686
# Upload the file to Clowder v2
687687
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
688-
if folder_v2 is not None:
689-
dataset_file_upload_endpoint += f"?folder_id={folder_v2['id']}"
690-
response = requests.post(
691-
dataset_file_upload_endpoint,
692-
headers=headers_v2,
693-
files={"file": open(filename, "rb")},
694-
)
688+
if folder_v2:
689+
dataset_file_upload_endpoint += f"Multiple?folder_id={folder_v2['id']}"
690+
response = requests.post(
691+
dataset_file_upload_endpoint,
692+
headers=headers_v2,
693+
files=[("files", (filename, open(filename, "rb")))],
694+
)
695+
else:
696+
response = requests.post(
697+
dataset_file_upload_endpoint,
698+
headers=headers_v2,
699+
files={"file": open(filename, "rb")},
700+
)
695701

696702
# Clean up the local file after upload
697-
# try:
698-
# os.remove(filename)
699-
# except Exception as e:
700-
# print(f"Could not delete locally downloaded file: {filename}")
701-
# print(e)
703+
try:
704+
os.remove(filename)
705+
except Exception as e:
706+
print(f"Could not delete locally downloaded file: {filename}")
707+
print(e)
702708

703709
if response.status_code == 200:
704710
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
@@ -796,6 +802,56 @@ def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, header
796802
# return None
797803

798804

805+
def download_and_upload_file_to_matching_folder(file, dataset_v2_id, headers_v2, matching_folder = None):
806+
"""Download a file from Clowder v1 and upload it to Clowder v2."""
807+
filename = file["filename"]
808+
file_id = file["id"]
809+
810+
# Download the file from Clowder v1
811+
v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
812+
print(f"Downloading file: {filename}")
813+
download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
814+
815+
with open(filename, "wb") as f:
816+
f.write(download_response.content)
817+
818+
# Upload the file to Clowder v2
819+
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
820+
if matching_folder:
821+
dataset_file_upload_endpoint += f"Multiple?folder_id={matching_folder['id']}"
822+
823+
# DEBUG: Print the exact request details
824+
print(f"DEBUG: URL: {dataset_file_upload_endpoint}")
825+
print(f"DEBUG: Headers: {headers_v2}")
826+
print(f"DEBUG: Folder ID: {matching_folder['id']}")
827+
828+
response = requests.post(
829+
dataset_file_upload_endpoint,
830+
headers=headers_v2,
831+
files=[("files", (filename, open(filename, "rb")))],
832+
)
833+
else:
834+
response = requests.post(
835+
dataset_file_upload_endpoint,
836+
headers=headers_v2,
837+
files={"file": open(filename, "rb")},
838+
)
839+
840+
# Clean up the local file after upload
841+
try:
842+
os.remove(filename)
843+
except Exception as e:
844+
print(f"Could not delete locally downloaded file: {filename}")
845+
print(e)
846+
847+
if response.status_code == 200:
848+
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
849+
return response.json().get("id")
850+
else:
851+
print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
852+
853+
return None
854+
799855

800856
def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v2):
801857
"""Download a file from Clowder v1 and upload it to Clowder v2."""
@@ -1194,7 +1250,7 @@ def process_user_and_resources_collections(user_v1, USER_MAP, DATASET_MAP, COLLE
11941250
print(f"Got {len(user_v1_collections)} user collections in the top level")
11951251

11961252
for top_level_col in user_v1_collections:
1197-
dataset_v2 = create_v2_dataset_from_collection(top_level_col, user_v1, clowder_headers_v1, user_headers_v2)
1253+
dataset_v2 = create_v2_dataset_from_collection(top_level_col, user_v1, clowder_headers_v1 ,user_headers_v2, base_headers_v2)
11981254
print('did this')
11991255

12001256
for dataset in user_v1_datasets:
@@ -1245,6 +1301,21 @@ def process_user_and_resources_collections(user_v1, USER_MAP, DATASET_MAP, COLLE
12451301
files_result = files_response.json()
12461302

12471303
for file in files_result:
1304+
file_folder = file.get("folders", None)
1305+
matching_folder = None
1306+
if file_folder:
1307+
matching_folder = next(
1308+
(
1309+
folder
1310+
for folder in all_dataset_folders
1311+
if folder["name"] == file_folder["name"]
1312+
),
1313+
None,
1314+
)
1315+
print('did we get matching folder?')
1316+
file_v2_id = download_and_upload_file_to_matching_folder(
1317+
file, dataset_v2_id, base_user_headers_v2, matching_folder
1318+
)
12481319
file_v2_id = download_and_upload_file(
12491320
file, all_dataset_folders, dataset_v2_id, base_user_headers_v2
12501321
)
@@ -1344,8 +1415,23 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
13441415
dataset_files_endpoint, headers=clowder_headers_v1, verify=False
13451416
)
13461417
files_result = files_response.json()
1347-
1418+
# TODO test folde rher
13481419
for file in files_result:
1420+
file_folder = file.get("folders", None)
1421+
matching_folder = None
1422+
if file_folder:
1423+
matching_folder = next(
1424+
(
1425+
folder
1426+
for folder in all_dataset_folders
1427+
if folder["name"] == file_folder["name"]
1428+
),
1429+
None,
1430+
)
1431+
print('did we get matching folder?')
1432+
file_v2_id = download_and_upload_file_to_matching_folder(
1433+
file, dataset_v2_id, base_user_headers_v2, matching_folder
1434+
)
13491435
file_v2_id = download_and_upload_file(
13501436
file, all_dataset_folders, dataset_v2_id, base_user_headers_v2
13511437
)
@@ -1419,12 +1505,12 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
14191505
"[Local Account]" in user_v1["identityProvider"]
14201506
and user_v1["email"] != admin_user["email"]
14211507
):
1422-
[USER_MAP, DATASET_MAP] = process_user_and_resources(
1423-
user_v1, USER_MAP, DATASET_MAP
1424-
)
1425-
# [USER_MAP, DATASET_MAP] = process_user_and_resources_collections(
1426-
# user_v1, USER_MAP, DATASET_MAP, COLLECTIONS_MAP
1508+
# [USER_MAP, DATASET_MAP] = process_user_and_resources(
1509+
# user_v1, USER_MAP, DATASET_MAP
14271510
# )
1511+
[USER_MAP, DATASET_MAP] = process_user_and_resources_collections(
1512+
user_v1, USER_MAP, DATASET_MAP, COLLECTIONS_MAP
1513+
)
14281514
print(f"Migrated user {user_v1['email']} and associated resources.")
14291515
else:
14301516
print(f"Skipping user {user_v1['email']} as it is not a local account.")

0 commit comments

Comments
 (0)