@@ -117,6 +117,102 @@ def add_v1_space_members_to_v2_group(space, group_id, headers):
117117 )
118118
119119
120+ def get_clowder_v1_user_collections (headers , user_v1 ):
121+ endpoint = f"{ CLOWDER_V1 } /api/collections"
122+ response = requests .get (endpoint , headers = headers )
123+ return [col for col in response .json () if col ["authorId" ] == user_v1 ["id" ]]
124+
125+
126+ def get_clowder_v1_dataset_collections (headers , user_v1 , dataset_id ):
127+ matching_collections = []
128+ endpoint = f"{ CLOWDER_V1 } /api/collections/allCollections"
129+ response = requests .get (endpoint , headers = headers )
130+ user_collections = response .json ()
131+ for collection in user_collections :
132+ collection_id = collection ["id" ]
133+ collection_dataset_endpoint = (
134+ f"{ CLOWDER_V1 } /api/collections/{ collection_id } /datasets"
135+ )
136+ try :
137+ dataset_response = requests .get (
138+ collection_dataset_endpoint , headers = headers
139+ )
140+ datasets = dataset_response .json ()
141+ for ds in datasets :
142+ if ds ["id" ] == dataset_id :
143+ matching_collections .append (collection )
144+ except Exception as e :
145+ print ("Exception" , e )
146+ return matching_collections
147+
148+
149+ def get_clowder_v1_collection (collection_id , headers ):
150+ endpoint = f"{ CLOWDER_V1 } /api/collections/{ collection_id } "
151+ response = requests .get (endpoint , headers = headers )
152+ return response .json ()
153+
154+
155+ def get_clowder_v1_collections (collection_ids , headers ):
156+ collections = []
157+ for collection_id in collection_ids :
158+ endpoint = f"{ CLOWDER_V1 } /api/collections/{ collection_id } "
159+ response = requests .get (endpoint , headers = headers )
160+ collections .append (response .json ())
161+ return collections
162+
163+
164+ def get_clowder_v1_collection_self_and_ancestors (
165+ collection_id , self_and_ancestors , headers
166+ ):
167+ endpoint = f"{ CLOWDER_V1 } /api/collections/{ collection_id } "
168+ response = requests .get (endpoint , headers = headers )
169+ self = response .json ()
170+ if self ["id" ] not in self_and_ancestors :
171+ self_and_ancestors .append (self ["id" ])
172+ parents_entry = self ["parent_collection_ids" ]
173+ parents_entry = parents_entry .lstrip ("List(" )
174+ parents_entry = parents_entry .rstrip (")" )
175+ if parents_entry != "" :
176+ parents = parents_entry .split ("," )
177+ for parent in parents :
178+ # replace empty space
179+ parent = parent .lstrip (" " )
180+ parent = parent .rstrip (" " )
181+ if parent not in self_and_ancestors :
182+ self_and_ancestors .append (parent )
183+ for parent in parents :
184+ parent = parent .lstrip (" " )
185+ parent = parent .rstrip (" " )
186+ if parent != "" and parent is not None :
187+ current_self_and_ancestors = (
188+ get_clowder_v1_collection_self_and_ancestors (
189+ parent , self_and_ancestors , headers = headers
190+ )
191+ )
192+ for col_id in current_self_and_ancestors :
193+ if col_id not in self_and_ancestors :
194+ self_and_ancestors .append (col_id )
195+ return self_and_ancestors
196+
197+
198+ def get_clowder_v1_parent_collection_ids (current_collection_id , headers ):
199+ parents = []
200+ all_collections_v1_endpoint = (
201+ f"{ CLOWDER_V1 } /api/collections/allCollections?limit=0&showAll=true"
202+ )
203+ response = requests .get (all_collections_v1_endpoint , headers = headers )
204+ all_collections = response .json ()
205+ for collection in all_collections :
206+ children_entry = collection ["child_collection_ids" ]
207+ children_entry = children_entry .lstrip ("List(" )
208+ children_entry = children_entry .rstrip (")" )
209+ child_ids = children_entry .split ("," )
210+ for child in child_ids :
211+ if child == current_collection_id :
212+ parents .append (collection ["id" ])
213+ return parents
214+
215+
120216def create_local_user (user_v1 ):
121217 """Create a local user in Clowder v2 if they don't already exist, and generate an API key."""
122218 # Search for the user by email
@@ -169,10 +265,74 @@ def create_admin_user():
169265 return generate_user_api_key (admin_user , admin_user ["password" ])
170266
171267
268+ def add_dataset_license (v1_license , headers ):
269+ """Create appropriate license (standard/custom) based on v1 license details"""
270+ license_id = "CC-BY"
271+ # standard licenses
272+ if v1_license ["license_type" ] == "license2" :
273+ if (
274+ not v1_license ["ccAllowCommercial" ]
275+ and not v1_license ["ccAllowDerivative" ]
276+ and not v1_license ["ccRequireShareAlike" ]
277+ ):
278+ license_id = "CC BY-NC-ND"
279+ elif (
280+ v1_license ["ccAllowCommercial" ]
281+ and not v1_license ["ccAllowDerivative" ]
282+ and not v1_license ["ccRequireShareAlike" ]
283+ ):
284+ license_id = "CC BY-ND"
285+ elif (
286+ not v1_license ["ccAllowCommercial" ]
287+ and v1_license ["ccAllowDerivative" ]
288+ and not v1_license ["ccRequireShareAlike" ]
289+ ):
290+ license_id = "CC BY-NC"
291+ elif (
292+ not v1_license ["ccAllowCommercial" ]
293+ and v1_license ["ccAllowDerivative" ]
294+ and v1_license ["ccRequireShareAlike" ]
295+ ):
296+ license_id = "CC BY-NC-SA"
297+ elif (
298+ v1_license ["ccAllowCommercial" ]
299+ and v1_license ["ccAllowDerivative" ]
300+ and v1_license ["ccRequireShareAlike" ]
301+ ):
302+ license_id = "CC BY-SA"
303+ elif (
304+ v1_license ["ccAllowCommercial" ]
305+ and v1_license ["ccAllowDerivative" ]
306+ and not v1_license ["ccRequireShareAlike" ]
307+ ):
308+ license_id = "CC BY"
309+ elif v1_license ["license_type" ] == "license3" :
310+ license_id = "CCO Public Domain Dedication"
311+ else :
312+ # custom license
313+ license_body = {
314+ "name" : v1_license ["license_text" ],
315+ "url" : v1_license ["license_url" ],
316+ "holders" : v1_license ["holders" ],
317+ }
318+ if license_body ["url" ] == "" :
319+ license_body ["url" ] = "https://dbpedia.org/page/All_rights_reserved"
320+ license_v2_endpoint = f"{ CLOWDER_V2 } /api/v2/licenses?"
321+ response = requests .post (
322+ license_v2_endpoint , headers = headers , json = license_body
323+ )
324+ print (response .json ())
325+ license_id = response .json ()["id" ]
326+ return license_id
327+
328+
172329def create_v2_dataset (dataset , headers ):
173330 """Create a dataset in Clowder v2."""
174331 # TODO: GET correct license
175- dataset_in_v2_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets?license_id=CC BY"
332+ print ("Creating dataset license in Clowder v2." )
333+ v2_license_id = add_dataset_license (dataset ["license" ], headers )
334+
335+ dataset_in_v2_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets?license_id={ v2_license_id } "
176336 dataset_example = {
177337 "name" : dataset ["name" ],
178338 "description" : dataset ["description" ],
@@ -439,6 +599,101 @@ def register_migration_extractor():
439599 )
440600
441601
602+ def add_children (collection_hierarchy_json , remaining_collections ):
603+ new_json = []
604+ new_remaining_collections = []
605+ for collection in remaining_collections :
606+ collection_parents = collection ["parent_collection_ids" ]
607+ current_collection_parents = []
608+ for entry in collection_hierarchy_json :
609+ if entry ["id" ] in collection_parents :
610+ current_collection_parents .append (entry )
611+ print ("We got the parents now" )
612+ if len (current_collection_parents ) > 0 :
613+ current_collection_entry = {
614+ "id" : collection ["id" ],
615+ "name" : collection ["name" ],
616+ "parents" : current_collection_parents ,
617+ }
618+ new_json .append (current_collection_entry )
619+ else :
620+ new_remaining_collections .append (collection )
621+ return new_json , new_remaining_collections
622+
623+
624+ def build_collection_hierarchy (collection_id , headers ):
625+ self_and_ancestors = get_clowder_v1_collection_self_and_ancestors (
626+ collection_id = collection_id , self_and_ancestors = [], headers = headers
627+ )
628+ self_and_ancestors_collections = get_clowder_v1_collections (
629+ self_and_ancestors , headers = clowder_headers_v1
630+ )
631+ children = []
632+ remaining_collections = []
633+ for col in self_and_ancestors_collections :
634+ parent_collection_ids = col ["parent_collection_ids" ]
635+ parent_collection_ids = parent_collection_ids .lstrip ("List(" )
636+ parent_collection_ids = parent_collection_ids .rstrip (")" )
637+ parent_collection_ids = parent_collection_ids .lstrip (" " )
638+ parent_collection_ids = parent_collection_ids .rstrip (" " )
639+ if parent_collection_ids == "" :
640+ root_col_entry = {"name" : col ["name" ], "id" : col ["id" ], "parents" : []}
641+ children .append (root_col_entry )
642+ else :
643+ remaining_collections .append (col )
644+
645+ while len (remaining_collections ) > 0 :
646+ children , remaining_collections = add_children (children , remaining_collections )
647+ print ("Now we are done" )
648+ return children
649+
650+
651+ def build_collection_metadata_for_v1_dataset (dataset_id , user_v1 , headers ):
652+ dataset_collections = get_clowder_v1_dataset_collections (
653+ headers = headers , user_v1 = user_v1 , dataset_id = dataset_id
654+ )
655+ return dataset_collections
656+
657+
658+ def build_collection_space_metadata_for_v1_dataset (dataset , user_v1 , headers ):
659+ dataset_id = dataset ["id" ]
660+ dataset_collections = get_clowder_v1_dataset_collections (
661+ headers = headers , user_v1 = user_v1 , dataset_id = dataset_id
662+ )
663+ dataset_spaces = dataset ["spaces" ]
664+ space_entries = []
665+ for space_id in dataset_spaces :
666+ space_endpoint = f"{ CLOWDER_V1 } /api/spaces/{ space_id } "
667+ response = requests .get (space_endpoint , headers = headers )
668+ space = response .json ()
669+ try :
670+ space_entry = {
671+ "id" : space ["id" ],
672+ "name" : space ["name" ],
673+ "creator" : space ["creator" ],
674+ }
675+ space_entries .append (space_entry )
676+ except Exception as e :
677+ print (f"Error in getting space entry." )
678+ print (e )
679+ try :
680+ space_entry = {"id" : space ["id" ], "name" : space ["name" ]}
681+ space_entries .append (space_entry )
682+ except Exception as e :
683+ print (f"Error in getting space entry" )
684+ print (e )
685+ collection_data = []
686+ for collection in dataset_collections :
687+ collection_children = build_collection_hierarchy (
688+ collection_id = collection ["id" ], headers = headers
689+ )
690+ for child in collection_children :
691+ collection_data .append (child )
692+ metadata = {"spaces" : space_entries , "collections" : collection_data }
693+ print (f"Got space and collection metadata from dataset { dataset_id } " )
694+ return metadata
695+
696+
442697def process_user_and_resources (user_v1 , USER_MAP , DATASET_MAP ):
443698 """Process user resources from Clowder v1 to Clowder v2."""
444699 user_v1_datasets = get_clowder_v1_user_datasets (user_id = user_v1 ["id" ])
@@ -476,6 +731,35 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
476731 )
477732 if file_v2_id is not None :
478733 add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
734+ # posting the collection hierarchy as metadata
735+ collection_space_metadata_dict = build_collection_space_metadata_for_v1_dataset (
736+ dataset = dataset , user_v1 = user_v1 , headers = clowder_headers_v1
737+ )
738+ migration_extractor_collection_metadata = {
739+ "listener" : {
740+ "name" : "migration" ,
741+ "version" : "1" ,
742+ "description" : "Migration of metadata from Clowder v1 to Clowder v2" ,
743+ },
744+ "context_url" : "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld" ,
745+ "content" : collection_space_metadata_dict ,
746+ "contents" : collection_space_metadata_dict ,
747+ }
748+ v2_metadata_endpoint = f"{ CLOWDER_V2 } /api/v2/datasets/{ dataset_v2_id } /metadata"
749+ response = requests .post (
750+ v2_metadata_endpoint ,
751+ json = migration_extractor_collection_metadata ,
752+ headers = clowder_headers_v2 ,
753+ )
754+ if response .status_code == 200 :
755+ print ("Successfully added collection info as metadata in v2." )
756+ else :
757+ print (
758+ f"Failed to add collection info as metadata in Clowder v2. Status code: { response .status_code } "
759+ )
760+
761+ if file_v2_id is not None :
762+ add_file_metadata (file , file_v2_id , clowder_headers_v1 , user_headers_v2 )
479763
480764 return [USER_MAP , DATASET_MAP ]
481765
0 commit comments