Skip to content

Commit 04fc4b2

Browse files
committed
partial fix?
1 parent 1a5e2b7 commit 04fc4b2

File tree

2 files changed

+297
-10
lines changed

2 files changed

+297
-10
lines changed

scripts/migration/migrate.py

Lines changed: 225 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def process_dataset_files(dataset, headers_v1, headers_v2, parent_type, parent_i
265265
if folder_v2['name'] == file['folders']['name']:
266266
print(f"Upload this file to a folder")
267267
matching_folder = folder_v2
268-
download_and_upload_file()
268+
download_and_upload_file_to_folder_id(file, matching_folder, dataset_v2_id, headers_v2)
269269
else:
270270
print(f"This file is not in a folder")
271271
# TODO upload it to the folder
@@ -568,6 +568,8 @@ def add_folder_hierarchy_to_migration_folder(folder_hierarchy, dataset_v2, folde
568568
def add_folder_hierarchy(folder_hierarchy, dataset_v2, headers):
569569
"""Add folder hierarchy to a dataset in Clowder v2."""
570570
hierarchy_parts = folder_hierarchy.split("/")
571+
if hierarchy_parts[0] == '':
572+
hierarchy_parts = hierarchy_parts[1:]
571573
current_parent = None
572574
for part in hierarchy_parts:
573575
result = create_folder_if_not_exists_or_get(
@@ -642,11 +644,15 @@ def download_and_upload_file_to_folder(file, folder, dataset_v2_id, headers_v2):
642644
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
643645
if folder is not None:
644646
# add folder if it is not None
645-
dataset_file_upload_endpoint += f"Multiple?folder_id={folder['id']}"
647+
folder_id = folder["id"]
648+
dataset_file_upload_endpoint += f"Multiple?folder_id={folder_id}"
649+
files = [
650+
("files", open(filename, "rb")),
651+
]
646652
response = requests.post(
647653
dataset_file_upload_endpoint,
648654
headers=headers_v2,
649-
files={"file": open(filename, "rb")},
655+
files=files,
650656
)
651657
if response.status_code == 200:
652658
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
@@ -662,6 +668,134 @@ def download_and_upload_file_to_folder(file, folder, dataset_v2_id, headers_v2):
662668
print(e)
663669
return None
664670

671+
def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, headers_v2):
672+
"""Download a file from Clowder v1 and upload it to Clowder v2."""
673+
filename = file["filename"]
674+
file_id = file["id"]
675+
file_folder = file.get("folders", None)
676+
677+
# Download the file from Clowder v1
678+
v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
679+
print(f"Downloading file: {filename}")
680+
download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
681+
682+
with open(filename, "wb") as f:
683+
f.write(download_response.content)
684+
685+
file_exists = os.path.exists(filename)
686+
# Upload the file to Clowder v2
687+
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
688+
if folder_v2 is not None:
689+
dataset_file_upload_endpoint += f"?folder_id={folder_v2['id']}"
690+
response = requests.post(
691+
dataset_file_upload_endpoint,
692+
headers=headers_v2,
693+
files={"file": open(filename, "rb")},
694+
)
695+
696+
# Clean up the local file after upload
697+
# try:
698+
# os.remove(filename)
699+
# except Exception as e:
700+
# print(f"Could not delete locally downloaded file: {filename}")
701+
# print(e)
702+
703+
if response.status_code == 200:
704+
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
705+
return response.json().get("id")
706+
else:
707+
print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
708+
709+
return None
710+
711+
# def download_and_upload_file_to_folder_id(file, folder_v2, dataset_v2_id, headers_v2):
712+
# """Download a file from Clowder v1 and upload it to Clowder v2."""
713+
# filename = file["filename"]
714+
# file_id = file["id"]
715+
#
716+
# # DEBUG: Print all inputs
717+
# print(f"=== DEBUG START ===")
718+
# print(f"File: {file}")
719+
# print(f"Folder_v2: {folder_v2}")
720+
# print(f"Dataset_v2_id: {dataset_v2_id}")
721+
# print(f"Headers_v2 keys: {list(headers_v2.keys()) if headers_v2 else 'None'}")
722+
#
723+
# # Download the file from Clowder v1
724+
# v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
725+
# print(f"Downloading file: {filename} from {v1_download_url}")
726+
# download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
727+
# print(f"Download status: {download_response.status_code}")
728+
#
729+
# with open(filename, "wb") as f:
730+
# f.write(download_response.content)
731+
#
732+
# # Check file exists and has content
733+
# file_size = os.path.getsize(filename)
734+
# print(f"Local file size: {file_size} bytes")
735+
#
736+
# # Upload the file to Clowder v2
737+
# dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
738+
#
739+
# if folder_v2 is not None:
740+
# folder_id = folder_v2['id'] if isinstance(folder_v2, dict) else folder_v2.id
741+
# dataset_file_upload_endpoint += f"Multiple?folder_id={folder_id}"
742+
#
743+
# print(f"Upload endpoint: {dataset_file_upload_endpoint}")
744+
#
745+
# # Read file content to verify it's not corrupted
746+
# with open(filename, "rb") as f:
747+
# file_content = f.read()
748+
# print(f"File content length: {len(file_content)}")
749+
# print(f"File content starts with: {file_content[:100]}...")
750+
#
751+
# # Make the upload request with detailed debugging
752+
# with open(filename, "rb") as file_obj:
753+
# files = {"file": (filename, file_obj)}
754+
#
755+
# print(f"Final files dict: {files}")
756+
# # Create headers without content-type for file uploads
757+
# upload_headers = headers_v2.copy()
758+
# upload_headers.pop('content-type', None)
759+
# print(f"Final headers: {upload_headers}")
760+
#
761+
# # Use a session to see raw request
762+
# session = requests.Session()
763+
# prepared_request = requests.Request(
764+
# 'POST',
765+
# dataset_file_upload_endpoint,
766+
# headers=upload_headers,
767+
# files=files
768+
# ).prepare()
769+
#
770+
# print(f"Prepared request URL: {prepared_request.url}")
771+
# print(f"Prepared request headers: {dict(prepared_request.headers)}")
772+
# # Don't print body as it's binary, but we can check content-type
773+
# print(f"Content-Type header: {prepared_request.headers.get('Content-Type')}")
774+
#
775+
# response = session.send(prepared_request)
776+
#
777+
# # DEBUG: Full response analysis
778+
# print(f"Response status: {response.status_code}")
779+
# print(f"Response headers: {dict(response.headers)}")
780+
# print(f"Response text: {response.text}")
781+
# print(f"=== DEBUG END ===")
782+
#
783+
# # Clean up the local file after upload
784+
# try:
785+
# os.remove(filename)
786+
# except Exception as e:
787+
# print(f"Could not delete locally downloaded file: {filename}")
788+
# print(e)
789+
#
790+
# if response.status_code == 200:
791+
# print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
792+
# return response.json().get("id")
793+
# else:
794+
# print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
795+
#
796+
# return None
797+
798+
665799

666800
def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v2):
667801
"""Download a file from Clowder v1 and upload it to Clowder v2."""
@@ -693,11 +827,17 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
693827
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
694828
if matching_folder:
695829
dataset_file_upload_endpoint += f"Multiple?folder_id={matching_folder['id']}"
696-
response = requests.post(
697-
dataset_file_upload_endpoint,
698-
headers=headers_v2,
699-
files={"file": open(filename, "rb")},
700-
)
830+
response = requests.post(
831+
dataset_file_upload_endpoint,
832+
headers=headers_v2,
833+
files=[("files", (filename, open(filename, "rb")))],
834+
)
835+
else:
836+
response = requests.post(
837+
dataset_file_upload_endpoint,
838+
headers=headers_v2,
839+
files={"file": open(filename, "rb")},
840+
)
701841

702842
# Clean up the local file after upload
703843
try:
@@ -714,6 +854,78 @@ def download_and_upload_file(file, all_dataset_folders, dataset_v2_id, headers_v
714854

715855
return None
716856

857+
def download_and_upload_file_1(file, all_dataset_folders, dataset_v2_id, headers_v2):
858+
"""Download a file from Clowder v1 and upload it to Clowder v2."""
859+
filename = file["filename"]
860+
file_id = file["id"]
861+
file_folder = file.get("folders", None)
862+
863+
# Download the file from Clowder v1
864+
v1_download_url = f"{CLOWDER_V1}/api/files/{file_id}?superAdmin=true"
865+
print(f"Downloading file: {filename}")
866+
download_response = requests.get(v1_download_url, headers=clowder_headers_v1)
867+
868+
with open(filename, "wb") as f:
869+
f.write(download_response.content)
870+
871+
# Determine the correct folder in Clowder v2 for the upload
872+
matching_folder = None
873+
if file_folder:
874+
matching_folder = next(
875+
(
876+
folder
877+
for folder in all_dataset_folders
878+
if folder["name"] == file_folder["name"]
879+
),
880+
None,
881+
)
882+
883+
# Upload the file to Clowder v2
884+
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
885+
if matching_folder:
886+
dataset_file_upload_endpoint += f"Multiple?folder_id={matching_folder['id']}"
887+
888+
# DEBUG: Add the same debugging as the new method
889+
print(f"=== WORKING METHOD DEBUG ===")
890+
print(f"Upload endpoint: {dataset_file_upload_endpoint}")
891+
print(f"Headers: {headers_v2}")
892+
893+
with open(filename, "rb") as file_obj:
894+
files = {"file": (filename, file_obj)}
895+
896+
# Use a session to see raw request
897+
session = requests.Session()
898+
prepared_request = requests.Request(
899+
'POST',
900+
dataset_file_upload_endpoint,
901+
headers=headers_v2,
902+
files=files
903+
).prepare()
904+
905+
print(f"Prepared request URL: {prepared_request.url}")
906+
print(f"Prepared request headers: {dict(prepared_request.headers)}")
907+
print(f"Content-Type header: {prepared_request.headers.get('Content-Type')}")
908+
909+
response = session.send(prepared_request)
910+
911+
print(f"Response status: {response.status_code}")
912+
print(f"Response text: {response.text}")
913+
print(f"=== WORKING METHOD DEBUG END ===")
914+
915+
# Clean up the local file after upload
916+
try:
917+
os.remove(filename)
918+
except Exception as e:
919+
print(f"Could not delete locally downloaded file: {filename}")
920+
print(e)
921+
922+
if response.status_code == 200:
923+
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
924+
return response.json().get("id")
925+
else:
926+
print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
927+
928+
return None
717929

718930
def add_file_metadata(file_v1, file_v2_id, headers_v1, headers_v2):
719931
# Get metadata from Clowder V1
@@ -1207,9 +1419,12 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
12071419
"[Local Account]" in user_v1["identityProvider"]
12081420
and user_v1["email"] != admin_user["email"]
12091421
):
1210-
[USER_MAP, DATASET_MAP] = process_user_and_resources_collections(
1211-
user_v1, USER_MAP, DATASET_MAP, COLLECTIONS_MAP
1422+
[USER_MAP, DATASET_MAP] = process_user_and_resources(
1423+
user_v1, USER_MAP, DATASET_MAP
12121424
)
1425+
# [USER_MAP, DATASET_MAP] = process_user_and_resources_collections(
1426+
# user_v1, USER_MAP, DATASET_MAP, COLLECTIONS_MAP
1427+
# )
12131428
print(f"Migrated user {user_v1['email']} and associated resources.")
12141429
else:
12151430
print(f"Skipping user {user_v1['email']} as it is not a local account.")

scripts/migration/test.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import requests
2+
from dotenv import dotenv_values
3+
import os
4+
5+
path_to_env = os.path.join(os.getcwd(),"scripts","migration", ".env")
6+
path_to_toml = os.path.join(os.getcwd(),"scripts","migration", "config.toml")
7+
config = dotenv_values(dotenv_path=path_to_env)
8+
9+
CLOWDER_V1 = config["CLOWDER_V1"]
10+
ADMIN_KEY_V1 = config["ADMIN_KEY_V1"]
11+
CLOWDER_V2 = config["CLOWDER_V2"]
12+
ADMIN_KEY_V2 = config["ADMIN_KEY_V2"]
13+
14+
base_headers_v1 = {"X-API-key": ADMIN_KEY_V1}
15+
base_headers_v2 = {"X-API-key": ADMIN_KEY_V2}
16+
17+
clowder_headers_v2 = {
18+
**base_headers_v2,
19+
"Content-type": "application/json",
20+
"accept": "application/json",
21+
}
22+
23+
url = 'http://127.0.0.1:8000/api/v2'
24+
25+
def get_new_dataset_folders(dataset_id, headers):
26+
endpoint = f"{url}/datasets/{dataset_id}/all_folders"
27+
r = requests.get(endpoint, headers=headers)
28+
foldesr_json = r.json()
29+
print(r.json())
30+
31+
def download_and_upload_file_to_folder(file, folder_id, dataset_v2_id, headers_v2):
32+
"""Download a file from Clowder v1 and upload it to Clowder v2."""
33+
34+
35+
# Download the file from Clowder v1
36+
filename = 'test.txt'
37+
38+
39+
40+
# Upload the file to Clowder v2
41+
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/files"
42+
if folder_id is not None:
43+
# add folder if it is not None
44+
# folder_id = folder["id"]
45+
dataset_file_upload_endpoint += f"Multiple?folder_id={folder_id}"
46+
file_data = {"file": open(filename, "rb")}
47+
response = requests.post(
48+
dataset_file_upload_endpoint,
49+
headers=headers_v2,
50+
files=file_data,
51+
)
52+
if response.status_code == 200:
53+
print(f"Uploaded file: {filename} to dataset {dataset_v2_id}")
54+
return response.json().get("id")
55+
else:
56+
print(f"Failed to upload file: {filename} to dataset {dataset_v2_id}")
57+
58+
return None
59+
60+
61+
62+
63+
test_file = 'july-2018-temperature-precip.csv'
64+
if os.path.exists(test_file):
65+
print('it exists')
66+
67+
test_folder_id = '68b206b0fb9e6c77930beaab'
68+
test_dataset_id = '68b206a4fb9e6c77930beaa8'
69+
70+
download_and_upload_file_to_folder(test_file, None, test_dataset_id, clowder_headers_v2)
71+
72+
# new_folders = get_new_dataset_folders('68b080ee03137d5052c0872c', headers=clowder_headers_v2)

0 commit comments

Comments
 (0)