this seems to be working

tcnichol · tcnichol · commit 734c9f1e0574 · 2025-08-18T14:46:59.000-05:00
the map for the dataset collections seems to work will add documentation next commit
diff --git a/scripts/migration/dataset_collection_json.py b/scripts/migration/dataset_collection_json.py
@@ -27,6 +27,11 @@ def get_dataset_collections_map():
 
     for collection, datasets in data.items():
         for dataset in datasets:
-            dataset_to_collection[dataset] = collection
+            if dataset not in dataset_to_collection:
+                dataset_to_collection[dataset] = [collection]
+            else:
+                current_value = dataset_to_collection[dataset]
+                current_value.append(collection)
+                dataset_to_collection[dataset] = current_value
     return dataset_to_collection
 
diff --git a/scripts/migration/migrate.py b/scripts/migration/migrate.py
@@ -16,6 +16,10 @@
     post_metadata_definition,
 )
 
+from scripts.migration.dataset_collection_json import get_dataset_collections_map
+
+DATASET_COLLECTIONS_MAP = get_dataset_collections_map()
+
 # Configuration and Constants
 DEFAULT_PASSWORD = "Password123&"
 
@@ -725,10 +729,12 @@ def build_collection_metadata_for_v1_dataset(dataset_id, user_v1, headers):
 # TODO test this method
 def build_collection_space_metadata_for_v1_dataset(dataset, user_v1, headers):
     dataset_id = dataset["id"]
-    # TODO this is too slow we need a way to sort through collection hierarchy better
-    dataset_collections = get_clowder_v1_dataset_collections(
-        headers=headers, user_v1=user_v1, dataset_id=dataset_id
-    )
+    dataset_collections = []
+    if dataset_id in DATASET_COLLECTIONS_MAP:
+        dataset_collections_ids = DATASET_COLLECTIONS_MAP[dataset_id]
+        for col_id in dataset_collections_ids:
+            collection = get_clowder_v1_collection(col_id, headers=headers)
+            dataset_collections.append(collection)
     dataset_spaces = dataset["spaces"]
     space_entries = []
     for space_id in dataset_spaces:
@@ -739,7 +745,8 @@ def build_collection_space_metadata_for_v1_dataset(dataset, user_v1, headers):
             space_entry = {
                 "id": space["id"],
                 "name": space["name"],
-                "creator": space["creator"],
+                # TODO this is not part of the json
+                # "creator": space["creator"],
             }
             space_entries.append(space_entry)
         except Exception as e: