Skip to content

Commit f3edf58

Browse files
committed
posts collection name and id
1 parent e92aac9 commit f3edf58

File tree

1 file changed

+154
-37
lines changed

1 file changed

+154
-37
lines changed

scripts/migration/migrate.py

Lines changed: 154 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import requests
55
from dotenv import dotenv_values
66

7+
V1_TEST_DATASET_ID = "66d0a6e1e4b09db0f11b24ef"
8+
79
# Configuration and Constants
810
DEFAULT_PASSWORD = "Password123&"
911

@@ -136,49 +138,77 @@ def get_clowder_v1_dataset_collections(headers, user_v1, dataset_id):
136138
matching_collections.append(collection)
137139
return matching_collections
138140

141+
139142
def get_clowder_v1_collection(collection_id, headers):
140-
endpoint = (
141-
f"{CLOWDER_V1}/api/collections/{collection_id}"
142-
)
143+
endpoint = f"{CLOWDER_V1}/api/collections/{collection_id}"
143144
response = requests.get(endpoint, headers=headers)
144145
return response.json()
145146

146-
def get_clowder_v1_collection_self_and_ancestors(collection_id, self_and_ancestors, headers):
147-
endpoint = (
148-
f"{CLOWDER_V1}/api/collections/{collection_id}"
149-
)
147+
148+
def get_clowder_v1_collections(collection_ids, headers):
149+
collections = []
150+
for collection_id in collection_ids:
151+
endpoint = f"{CLOWDER_V1}/api/collections/{collection_id}"
152+
response = requests.get(endpoint, headers=headers)
153+
collections.append(response.json())
154+
return collections
155+
156+
157+
def get_clowder_v1_collection_self_and_ancestors(
158+
collection_id, self_and_ancestors, headers
159+
):
160+
endpoint = f"{CLOWDER_V1}/api/collections/{collection_id}"
150161
response = requests.get(endpoint, headers=headers)
151162
self = response.json()
152-
self_and_ancestors.append(self)
153-
parents = get_clowder_v1_parent_collection(self, headers=headers)
154-
self_and_ancestors.append(parents)
155-
for parent in parents:
156-
current_self_and_ancestors = get_clowder_v1_collection_self_and_ancestors(parent['id'],self_and_ancestors, headers=headers)
157-
self_and_ancestors += current_self_and_ancestors
158-
print("got parents")
163+
if "id" not in self:
164+
print("HERE")
165+
if self["id"] not in self_and_ancestors:
166+
self_and_ancestors.append(self["id"])
167+
parents_entry = self["parent_collection_ids"]
168+
parents_entry = parents_entry.lstrip("List(")
169+
parents_entry = parents_entry.rstrip(")")
170+
print(parents_entry)
171+
if parents_entry != "":
172+
parents = parents_entry.split(",")
173+
for parent in parents:
174+
# replace empty space
175+
parent = parent.lstrip(" ")
176+
parent = parent.rstrip(" ")
177+
if parent not in self_and_ancestors:
178+
self_and_ancestors.append(parent)
179+
for parent in parents:
180+
parent = parent.lstrip(" ")
181+
parent = parent.rstrip(" ")
182+
if parent != "" and parent is not None:
183+
current_self_and_ancestors = (
184+
get_clowder_v1_collection_self_and_ancestors(
185+
parent, self_and_ancestors, headers=headers
186+
)
187+
)
188+
for col_id in current_self_and_ancestors:
189+
if col_id not in self_and_ancestors:
190+
self_and_ancestors.append(col_id)
159191
return self_and_ancestors
160192

161-
def get_clowder_v1_parent_collection(current_collection, headers):
193+
194+
def get_clowder_v1_parent_collection_ids(current_collection_id, headers):
162195
parents = []
163196
all_collections_v1_endpoint = (
164197
f"{CLOWDER_V1}/api/collections/allCollections?limit=0&showAll=true"
165198
)
166199
response = requests.get(all_collections_v1_endpoint, headers=headers)
167200
all_collections = response.json()
168201
for collection in all_collections:
169-
children_entry = collection['child_collection_ids']
170-
children_entry = children_entry.lstrip('List(')
171-
children_entry = children_entry.rstrip(')')
172-
child_ids = children_entry.split(',')
202+
collection_name = collection["name"]
203+
if collection_name == "ROOT C" or collection_name == "ROOT D":
204+
print("C OR D")
205+
children_entry = collection["child_collection_ids"]
206+
children_entry = children_entry.lstrip("List(")
207+
children_entry = children_entry.rstrip(")")
208+
child_ids = children_entry.split(",")
173209
for child in child_ids:
174-
if child == current_collection['id']:
175-
collection_endpoint = (
176-
f"{CLOWDER_V1}/api/collections/{child}"
177-
)
178-
collection_response = requests.get(collection_endpoint, headers=headers)
179-
parent_collection = collection_response.json()
180-
# result = get_clowder_v1_parent_collection(parent_collection, headers=headers)
181-
parents.append(collection)
210+
if child == current_collection_id:
211+
parents.append(collection["id"])
182212
return parents
183213

184214

@@ -414,20 +444,107 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
414444
print("Successfully uploaded collection metadata")
415445
return [USER_MAP, DATASET_MAP]
416446

417-
migration_listener_info = {'name':'clowder.v1.migration',
418-
'version':'1.0',
419-
'description': 'migration script to migrate data from v1 to v2',
420-
"content":"STUFF HERE,",
421-
'contents':"STUFF HERE"}
422447

423-
{'context_url': 'https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld', 'content': {'lines': '47', 'words': '225', 'characters': '2154'}, 'contents': {'lines': '47', 'words': '225', 'characters': '2154'}, 'listener': {'name': 'ncsa.wordcount', 'version': '2.0', 'description': '2.0'}}
448+
migration_listener_info = {
449+
"name": "clowder.v1.migration",
450+
"version": "1.0",
451+
"description": "migration script to migrate data from v1 to v2",
452+
"content": "STUFF HERE,",
453+
"contents": "STUFF HERE",
454+
}
455+
456+
{
457+
"context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
458+
"content": {"lines": "47", "words": "225", "characters": "2154"},
459+
"contents": {"lines": "47", "words": "225", "characters": "2154"},
460+
"listener": {"name": "ncsa.wordcount", "version": "2.0", "description": "2.0"},
461+
}
462+
463+
464+
def add_children(collection_hierarchy_json, remaining_collections):
465+
new_json = []
466+
new_remaining_collections = []
467+
for collection in remaining_collections:
468+
collection_parents = collection["parent_collection_ids"]
469+
current_collection_parents = []
470+
for entry in collection_hierarchy_json:
471+
if entry["id"] in collection_parents:
472+
current_collection_parents.append(entry)
473+
print("We got the parents now")
474+
if len(current_collection_parents) > 0:
475+
current_collection_entry = {
476+
"id": collection["id"],
477+
"name": collection["name"],
478+
"parents": current_collection_parents,
479+
}
480+
new_json.append(current_collection_entry)
481+
else:
482+
new_remaining_collections.append(collection)
483+
return new_json, new_remaining_collections
484+
485+
486+
def build_collection_hierarchy(collection_id, headers):
487+
self_and_ancestors = get_clowder_v1_collection_self_and_ancestors(
488+
collection_id=TEST_COL_ID, self_and_ancestors=[], headers=clowder_headers_v1
489+
)
490+
self_and_ancestors_collections = get_clowder_v1_collections(
491+
self_and_ancestors, headers=clowder_headers_v1
492+
)
493+
root_collections = []
494+
children = []
495+
remaining_collections = []
496+
for col in self_and_ancestors_collections:
497+
parent_collection_ids = col["parent_collection_ids"]
498+
parent_collection_ids = parent_collection_ids.lstrip("List(")
499+
parent_collection_ids = parent_collection_ids.rstrip(")")
500+
parent_collection_ids = parent_collection_ids.lstrip(" ")
501+
parent_collection_ids = parent_collection_ids.rstrip(" ")
502+
if parent_collection_ids == "":
503+
root_col_entry = {"name": col["name"], "id": col["id"], "parents": []}
504+
root_collections.append(root_col_entry)
505+
else:
506+
remaining_collections.append(col)
507+
while len(remaining_collections) > 0:
508+
children, remaining_collections = add_children(
509+
root_collections, remaining_collections
510+
)
511+
print("Now we are done")
512+
return children
513+
424514

425515
if __name__ == "__main__":
426516
# users_v1 = get_clowder_v1_users()
427-
current_hierarch = {}
428-
current_collection = get_clowder_v1_collection('66cf6e4ecc50c8c5f1c067bf', headers=clowder_headers_v1)
429-
collection_entry = {'collection_id': current_collection['id'], 'collection_name': current_collection['name']}
430-
hierarchy = get_clowder_v1_collection_self_and_ancestors(current_collection['id'], [], headers=base_headers_v1)
517+
TEST_COL_ID = "66d0a6c0e4b09db0f11b24e4"
518+
ROOT_COL_ID = "66d0a6aae4b09db0f11b24dd"
519+
result = build_collection_hierarchy(
520+
collection_id=TEST_COL_ID, headers=clowder_headers_v1
521+
)
522+
# parents = get_clowder_v1_parent_collection_ids(current_collection_id=TEST_COL_ID, headers=clowder_headers_v1)
523+
self_and_ancestors = get_clowder_v1_collection_self_and_ancestors(
524+
collection_id=TEST_COL_ID, self_and_ancestors=[], headers=clowder_headers_v1
525+
)
526+
self_and_ancestors_collections = get_clowder_v1_collections(
527+
self_and_ancestors, headers=clowder_headers_v1
528+
)
529+
root_collections = []
530+
remaining_collections = []
531+
for col in self_and_ancestors_collections:
532+
parent_collection_ids = col["parent_collection_ids"]
533+
parent_collection_ids = parent_collection_ids.lstrip("List(")
534+
parent_collection_ids = parent_collection_ids.rstrip(")")
535+
parent_collection_ids = parent_collection_ids.lstrip(" ")
536+
parent_collection_ids = parent_collection_ids.rstrip(" ")
537+
if parent_collection_ids == "":
538+
root_col_entry = {"name": col["name"], "id": col["id"], "parents": []}
539+
root_collections.append(root_col_entry)
540+
else:
541+
remaining_collections.append(col)
542+
543+
print("the parent col")
544+
print("got root collections")
545+
children, remaining_collections = add_children(
546+
root_collections, remaining_collections
547+
)
431548
USER_MAP = {}
432549
DATASET_MAP = {}
433550
users_v1 = [

0 commit comments

Comments
 (0)