Skip to content

Commit c7e218c

Browse files
committed
check to see if we should migrate a dataset
1 parent c64be06 commit c7e218c

File tree

1 file changed

+76
-46
lines changed

1 file changed

+76
-46
lines changed

scripts/migration/migrate.py

Lines changed: 76 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -735,55 +735,85 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
735735

736736
for dataset in user_v1_datasets:
737737
print(f"Creating dataset in v2: {dataset['id']} - {dataset['name']}")
738-
dataset_v2_id = create_v2_dataset(dataset, user_headers_v2)
739-
DATASET_MAP[dataset["id"]] = dataset_v2_id
740-
add_dataset_metadata(dataset, dataset_v2_id, base_headers_v1, user_headers_v2)
741-
add_dataset_folders(dataset, dataset_v2_id, user_headers_v2)
742-
print("Created folders in the new dataset")
743-
744-
all_dataset_folders = get_folder_and_subfolders(dataset_v2_id, user_headers_v2)
745-
746-
# Retrieve files for the dataset in Clowder v1
747-
dataset_files_endpoint = (
748-
f"{CLOWDER_V1}/api/datasets/{dataset['id']}/files?superAdmin=true"
749-
)
750-
files_response = requests.get(
751-
dataset_files_endpoint, headers=clowder_headers_v1, verify=False
752-
)
753-
files_result = files_response.json()
738+
# TODO: check if dataset is in toml_exclude_dataset_id
739+
dataset_v1_id = dataset["id"]
740+
dataset_v1_spaces = dataset["spaces"]
741+
# TODO check if dataset is in toml_space_ids or exclude_space_ids
742+
MIGRATE_DATASET = True
743+
print(toml_exclude_dataset_ids)
744+
print(toml_space_ids)
745+
print(toml_exclude_space_ids)
746+
# Check if dataset is in the excluded dataset list
747+
if dataset_v1_id in toml_exclude_dataset_ids:
748+
print(f"Skipping dataset {dataset_v1_id} as it is in the exclude list.")
749+
MIGRATE_DATASET = False
750+
# Check if dataset is in the specified space list
751+
if toml_space_ids is not None and len(toml_space_ids) > 0:
752+
if not any(
753+
space_id in dataset_v1_spaces for space_id in toml_space_ids
754+
):
755+
print(
756+
f"Skipping dataset {dataset_v1_id} as it is not in the specified spaces."
757+
)
758+
MIGRATE_DATASET = False
759+
if toml_exclude_space_ids is not None and len(toml_exclude_space_ids) > 0:
760+
if any(
761+
space_id in dataset_v1_spaces for space_id in toml_exclude_space_ids
762+
):
763+
print(
764+
f"Skipping dataset {dataset_v1_id} as it is in the excluded spaces."
765+
)
766+
MIGRATE_DATASET = False
767+
if MIGRATE_DATASET:
768+
dataset_v2_id = create_v2_dataset(dataset, user_headers_v2)
769+
DATASET_MAP[dataset["id"]] = dataset_v2_id
770+
add_dataset_metadata(dataset, dataset_v2_id, base_headers_v1, user_headers_v2)
771+
add_dataset_folders(dataset, dataset_v2_id, user_headers_v2)
772+
print("Created folders in the new dataset")
773+
774+
all_dataset_folders = get_folder_and_subfolders(dataset_v2_id, user_headers_v2)
775+
776+
# Retrieve files for the dataset in Clowder v1
777+
dataset_files_endpoint = (
778+
f"{CLOWDER_V1}/api/datasets/{dataset['id']}/files?superAdmin=true"
779+
)
780+
files_response = requests.get(
781+
dataset_files_endpoint, headers=clowder_headers_v1, verify=False
782+
)
783+
files_result = files_response.json()
754784

755-
for file in files_result:
756-
file_v2_id = download_and_upload_file(
757-
file, all_dataset_folders, dataset_v2_id, base_user_headers_v2
785+
for file in files_result:
786+
file_v2_id = download_and_upload_file(
787+
file, all_dataset_folders, dataset_v2_id, base_user_headers_v2
788+
)
789+
if file_v2_id is not None:
790+
add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2)
791+
# posting the collection hierarchy as metadata
792+
collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset(
793+
dataset=dataset, user_v1=user_v1, headers=clowder_headers_v1
758794
)
759-
if file_v2_id is not None:
760-
add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2)
761-
# posting the collection hierarchy as metadata
762-
collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset(
763-
dataset=dataset, user_v1=user_v1, headers=clowder_headers_v1
764-
)
765-
migration_extractor_collection_metadata = {
766-
"listener": {
767-
"name": "migration",
768-
"version": "1",
769-
"description": "Migration of metadata from Clowder v1 to Clowder v2",
770-
},
771-
"context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
772-
"content": collection_space_metadata_dict,
773-
"contents": collection_space_metadata_dict,
774-
}
775-
v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
776-
response = requests.post(
777-
v2_metadata_endpoint,
778-
json=migration_extractor_collection_metadata,
779-
headers=clowder_headers_v2,
780-
)
781-
if response.status_code == 200:
782-
print("Successfully added collection info as metadata in v2.")
783-
else:
784-
print(
785-
f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}"
795+
migration_extractor_collection_metadata = {
796+
"listener": {
797+
"name": "migration",
798+
"version": "1",
799+
"description": "Migration of metadata from Clowder v1 to Clowder v2",
800+
},
801+
"context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
802+
"content": collection_space_metadata_dict,
803+
"contents": collection_space_metadata_dict,
804+
}
805+
v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
806+
response = requests.post(
807+
v2_metadata_endpoint,
808+
json=migration_extractor_collection_metadata,
809+
headers=clowder_headers_v2,
786810
)
811+
if response.status_code == 200:
812+
print("Successfully added collection info as metadata in v2.")
813+
else:
814+
print(
815+
f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}"
816+
)
787817

788818
return [USER_MAP, DATASET_MAP]
789819

0 commit comments

Comments
 (0)