Skip to content

Commit 0a8e764

Browse files
committed
migrate now adds a metadata file to a folder
1 parent 5b2562d commit 0a8e764

File tree

2 files changed

+86
-2
lines changed

2 files changed

+86
-2
lines changed

scripts/migration/dataset_collection_json.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,46 @@
99
except ImportError:
1010
import tomli as tomllib
1111

12+
path_to_env = os.path.join(os.getcwd(),"scripts","migration", ".env")
13+
config = dotenv_values(dotenv_path=path_to_env)
1214

15+
CLOWDER_V1 = config["CLOWDER_V1"]
16+
ADMIN_KEY_V1 = config["ADMIN_KEY_V1"]
17+
CLOWDER_V2 = config["CLOWDER_V2"]
18+
ADMIN_KEY_V2 = config["ADMIN_KEY_V2"]
19+
20+
base_headers_v1 = {"X-API-key": ADMIN_KEY_V1}
21+
base_headers_v2 = {"X-API-key": ADMIN_KEY_V2}
22+
23+
clowder_headers_v1 = {
24+
**base_headers_v1,
25+
"Content-type": "application/json",
26+
"accept": "application/json",
27+
}
1328

1429
DEFAULT_PASSWORD = "Password123&"
1530

1631
# Get the current timestamp
1732
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1833
COLLECTIONS_FILE = "collections_datasets.json"
1934

35+
def get_all_datasets(header):
36+
endpoint = f"{CLOWDER_V1}/api/datasets?superAdmin=true&limit=0"
37+
datasets = requests.get(endpoint, headers=header).json()
38+
return datasets
39+
40+
def get_dataset_metadata(dataset_v1_id, headers_v1):
41+
# Get metadata from Clowder V1
42+
endpoint = (
43+
f"{CLOWDER_V1}/api/datasets/{dataset_v1_id}/metadata.jsonld?superAdmin=true"
44+
)
45+
metadata_v1 = requests.get(endpoint, headers=headers_v1).json()
46+
if len(metadata_v1) > 0:
47+
print('we got some metadata')
48+
with open('datasets_with_metadata.txt', 'a') as f:
49+
f.write(dataset_v1_id + '\n')
50+
return metadata_v1
51+
2052
def get_dataset_collections_map():
2153
print("Getting collections and datasets from Clowder v1...")
2254

@@ -38,5 +70,16 @@ def get_dataset_collections_map():
3870
def get_datasets_in_collections():
3971
map = get_dataset_collections_map()
4072
datasets_in_collections = list(map.keys())
73+
datasets_with_metadata = []
74+
for i in range(0, len(datasets_in_collections)):
75+
current_dataset = datasets_in_collections[i]
76+
dataset_metadata = get_dataset_metadata(current_dataset, base_headers_v1, datasets_with_metadata)
4177
return datasets_in_collections
4278

79+
if __name__ == "__main__":
80+
all_datasets = get_all_datasets(base_headers_v1)
81+
for i in range(0, len(all_datasets)):
82+
current_dataset = all_datasets[i]
83+
get_dataset_metadata(current_dataset['id'], base_headers_v1)
84+
get_datasets_in_collections()
85+

scripts/migration/migrate.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from datetime import datetime
3-
3+
import json
44
import requests
55
from dotenv import dotenv_values
66

@@ -217,9 +217,11 @@ def process_collection_descendants(collection, headers_v1, base_headers_v2, head
217217
new_folder = create_folder_if_not_exists_or_get(dataset["name"], v2_parent_id, v2_parent_type, v2_dataset_id, headers_v2)
218218
process_dataset_files_and_folders(dataset, headers_v1, base_headers_v2, 'folder', new_folder['id'], v2_dataset_id, new_folder)
219219
# TODO add dataset metadata to the folder
220+
add_dataset_metadata_to_folder(dataset, v2_dataset_id, new_folder['id'], headers_v1, base_headers_v2)
220221
else:
221222
new_folder = create_folder_if_not_exists_or_get(dataset["name"], v2_parent_id, v2_parent_type, v2_dataset_id, headers_v2)
222223
process_dataset_files_and_folders(dataset, headers_v1, base_headers_v2, 'folder', new_folder['id'], v2_dataset_id, new_folder)
224+
add_dataset_metadata_to_folder(dataset, v2_dataset_id, new_folder['id'], headers_v1, base_headers_v2)
223225
# TODO add dataset metadata to the folder
224226

225227

@@ -798,7 +800,6 @@ def add_file_metadata(file_v1, file_v2_id, headers_v1, headers_v2):
798800
print("Successfully posted file machine metadata to V2")
799801
break # machine metadata no need to iterate through all the keys
800802

801-
802803
def add_dataset_metadata(dataset_v1, dataset_v2_id, headers_v1, headers_v2):
803804
# Get metadata from Clowder V1
804805
endpoint = (
@@ -853,6 +854,46 @@ def add_dataset_metadata(dataset_v1, dataset_v2_id, headers_v1, headers_v2):
853854
break # machine metadata no need to iterate through all the keys
854855

855856

857+
def add_dataset_metadata_to_folder(dataset_v1, dataset_v2_id, folder_v2_id, headers_v1, headers_v2):
858+
# Get metadata from Clowder V1
859+
endpoint = (
860+
f"{CLOWDER_V1}/api/datasets/{dataset_v1['id']}/metadata.jsonld?superAdmin=true"
861+
)
862+
dataset_name = dataset_v1['name']
863+
metadata_file_name = dataset_name + '_metadata.json'
864+
metadata_v1 = requests.get(endpoint, headers=headers_v1).json()
865+
with open(metadata_file_name, "w") as metadata_file:
866+
json.dump(metadata_v1, metadata_file)
867+
868+
# upload the file to the folder in v2
869+
dataset_file_upload_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/filesMultiple?folder_id={folder_v2_id}"
870+
871+
response = requests.post(
872+
dataset_file_upload_endpoint,
873+
headers=headers_v2,
874+
files=[("files", (metadata_file_name, open(metadata_file_name, "rb")))],
875+
)
876+
877+
# Clean up the local file after upload
878+
print(f"Type response {type(response)}")
879+
try:
880+
os.remove(metadata_file_name)
881+
except Exception as e:
882+
print(f"Could not delete locally created metadata file: {metadata_file_name}")
883+
print(e)
884+
885+
if response.status_code == 200:
886+
print(f"Uploaded file: {metadata_file_name} to dataset {dataset_v2_id} and folder {folder_v2_id}")
887+
response_json = response.json()
888+
if type(response_json) == dict:
889+
return response.json().get("id")
890+
elif type(response_json) == list:
891+
return response_json[0].get("id")
892+
else:
893+
print(f"Failed to upload file: {metadata_file} to dataset {dataset_v2_id} and folder {folder_v2_id}")
894+
return None
895+
896+
856897
def register_migration_extractor():
857898
"""Register the migration extractor in Clowder v2."""
858899
migration_extractor = {

0 commit comments

Comments
 (0)