1212 METADATA_REGISTRANT , METADATA_SAMPLE_LOCATION , METADATA_ELEVATION , METADATA_SAMPLING_SITE , \
1313 METADATA_RESULT_TIME , METADATA_HAS_FEATURE_OF_INTEREST , METADATA_DESCRIPTION , METADATA_INFORMAL_CLASSIFICATION , \
1414 METADATA_KEYWORDS , METADATA_HAS_SPECIMEN_CATEGORY , METADATA_HAS_MATERIAL_CATEGORY , METADATA_HAS_CONTEXT_CATEGORY , \
15- METADATA_LABEL , METADATA_SAMPLE_IDENTIFIER , METADATA_AT_ID , METADATA_RESPONSIBILITY , METADATA_PRODUCED_BY , \
16- METADATA_NAME
15+ METADATA_LABEL , METADATA_SAMPLE_IDENTIFIER , METADATA_RESPONSIBILITY , METADATA_PRODUCED_BY , \
16+ METADATA_NAME , METADATA_KEYWORD , METADATA_IDENTIFIER , METADATA_ROLE
1717from isamples_metadata .solr_field_constants import SOLR_PRODUCED_BY_SAMPLING_SITE_PLACE_NAME , SOLR_AUTHORIZED_BY , \
1818 SOLR_COMPLIES_WITH , SOLR_PRODUCED_BY_SAMPLING_SITE_LOCATION_LONGITUDE , \
1919 SOLR_PRODUCED_BY_SAMPLING_SITE_LOCATION_LATITUDE , SOLR_RELATED_RESOURCE_ISB_CORE_ID , SOLR_CURATION_RESPONSIBILITY , \
@@ -65,6 +65,19 @@ def transform(table: Table, dest_path_no_extension: str, append: bool) -> str:
6565
6666
6767class JSONExportTransformer (AbstractExportTransformer ):
68+
69+ @staticmethod
70+ def filter_null_values (obj ):
71+ """
72+ Recursively filter out null values from a dictionary.
73+ """
74+ if isinstance (obj , dict ):
75+ return {k : JSONExportTransformer .filter_null_values (v ) for k , v in obj .items () if v is not None }
76+ elif isinstance (obj , list ):
77+ return [JSONExportTransformer .filter_null_values (elem ) for elem in obj if elem is not None ]
78+ else :
79+ return obj
80+
6881 @staticmethod
6982 def transform (table : Table , dest_path_no_extension : str , append : bool ) -> str :
7083 if append :
@@ -73,7 +86,7 @@ def transform(table: Table, dest_path_no_extension: str, append: bool) -> str:
7386 dest_path = f"{ dest_path_no_extension } .{ extension } "
7487 with open (dest_path , "w" ) as file :
7588 for row in petl .util .base .dicts (table ):
76- json .dump (row , file )
89+ json .dump (JSONExportTransformer . filter_null_values ( row ) , file )
7790 file .write ("\n " )
7891 return dest_path
7992
@@ -90,22 +103,39 @@ def _add_to_dict(self, target_dict: dict, target_key: str, source_dict: dict, so
90103 if source_value is not None :
91104 target_dict [target_key ] = source_value
92105
106+ def _add_responsibilities_to_container (self ,
107+ rec : dict ,
108+ responsibility_key_solr : str ,
109+ responsibility_key : str ,
110+ container : dict ):
111+ responsibilities = rec .get (responsibility_key_solr , [])
112+ responsibility_dicts = []
113+ for responsibility in responsibilities :
114+ pieces = responsibility .split (":" )
115+ responsibility_dicts .append ({METADATA_ROLE : pieces [0 ], METADATA_NAME : pieces [1 ]})
116+ if len (responsibility_dicts ) > 0 :
117+ container [responsibility_key ] = responsibility_dicts
118+
93119 def _curation_dict (self , rec : dict ) -> dict :
94120 curation_dict : dict = {}
95121 self ._add_to_dict (curation_dict , METADATA_LABEL , rec , SOLR_CURATION_LABEL )
96122 self ._add_to_dict (curation_dict , METADATA_DESCRIPTION , rec , SOLR_CURATION_DESCRIPTION )
97- self ._add_to_dict (curation_dict , METADATA_ACCESS_CONSTRAINTS , rec , SOLR_CURATION_ACCESS_CONSTRAINTS )
98123 self ._add_to_dict (curation_dict , METADATA_CURATION_LOCATION , rec , SOLR_CURATION_LOCATION )
99- self ._add_to_dict (curation_dict , METADATA_RESPONSIBILITY , rec , SOLR_CURATION_RESPONSIBILITY )
124+ self ._add_responsibilities_to_container (rec , SOLR_CURATION_RESPONSIBILITY , METADATA_RESPONSIBILITY , curation_dict )
125+ access_constraints = rec .get (SOLR_CURATION_ACCESS_CONSTRAINTS , "" ).split ("|" )
126+ if len (access_constraints ) > 0 :
127+ curation_dict [METADATA_ACCESS_CONSTRAINTS ] = access_constraints
100128 return curation_dict
101129
102130 def _produced_by_dict (self , rec : dict ) -> dict :
103131 produced_by_dict : dict = {}
104- self ._add_to_dict (produced_by_dict , METADATA_AT_ID , rec , SOLR_PRODUCED_BY_ISB_CORE_ID )
132+ self ._add_to_dict (produced_by_dict , METADATA_IDENTIFIER , rec , SOLR_PRODUCED_BY_ISB_CORE_ID )
105133 self ._add_to_dict (produced_by_dict , METADATA_LABEL , rec , SOLR_PRODUCED_BY_LABEL )
106- self ._add_to_dict (produced_by_dict , METADATA_RESPONSIBILITY , rec , SOLR_PRODUCED_BY_RESPONSIBILITY )
107134 self ._add_to_dict (produced_by_dict , METADATA_DESCRIPTION , rec , SOLR_PRODUCED_BY_DESCRIPTION )
108- self ._add_to_dict (produced_by_dict , METADATA_RESULT_TIME , rec , SOLR_PRODUCED_BY_RESULT_TIME )
135+ result_time = rec .get (SOLR_PRODUCED_BY_RESULT_TIME )
136+ if result_time is not None :
137+ result_time = result_time [:10 ]
138+ produced_by_dict [METADATA_RESULT_TIME ] = result_time
109139 self ._add_to_dict (produced_by_dict , METADATA_HAS_FEATURE_OF_INTEREST , rec , SOLR_PRODUCED_BY_FEATURE_OF_INTEREST )
110140 sampling_site_dict : dict = {}
111141 produced_by_dict [METADATA_SAMPLING_SITE ] = sampling_site_dict
@@ -117,10 +147,27 @@ def _produced_by_dict(self, rec: dict) -> dict:
117147 self ._add_to_dict (sample_location_dict , METADATA_ELEVATION , rec , SOLR_PRODUCED_BY_SAMPLING_SITE_ELEVATION_IN_METERS )
118148 self ._add_to_dict (sample_location_dict , METADATA_LATITUDE , rec , SOLR_PRODUCED_BY_SAMPLING_SITE_LOCATION_LATITUDE )
119149 self ._add_to_dict (sample_location_dict , METADATA_LONGITUDE , rec , SOLR_PRODUCED_BY_SAMPLING_SITE_LOCATION_LONGITUDE )
150+ self ._add_responsibilities_to_container (rec , SOLR_PRODUCED_BY_RESPONSIBILITY , METADATA_RESPONSIBILITY , produced_by_dict )
120151 return produced_by_dict
121152
153+ def _formatted_controlled_vocabulary (self , rec : dict , key : str ) -> list [dict ]:
154+ values = rec .get (key , [])
155+ return [{METADATA_LABEL : value } for value in values ]
156+
157+ def _has_specimen_categories (self , rec : dict ) -> list :
158+ return self ._formatted_controlled_vocabulary (rec , SOLR_HAS_SPECIMEN_CATEGORY )
159+
160+ def _has_material_categories (self , rec : dict ) -> list :
161+ return self ._formatted_controlled_vocabulary (rec , SOLR_HAS_MATERIAL_CATEGORY )
162+
163+ def _has_context_categories (self , rec : dict ) -> list :
164+ return self ._formatted_controlled_vocabulary (rec , SOLR_HAS_CONTEXT_CATEGORY )
165+
166+ def _keywords (self , rec : dict ) -> list :
167+ return [{METADATA_KEYWORD : keyword } for keyword in rec .get (SOLR_KEYWORDS , [])]
168+
122169 def _registrant_dict (self , rec : dict ) -> dict :
123- return {METADATA_NAME : rec [SOLR_REGISTRANT ]}
170+ return {METADATA_NAME : rec [SOLR_REGISTRANT ][ 0 ] }
124171
125172 def _rename_table_columns_csv (self ):
126173 """Renames the solr columns to the public names in the public metadata schema, while maintaining CSV tabular format"""
@@ -168,11 +215,11 @@ def _rename_table_columns_jsonl(self):
168215 mappings [METADATA_LABEL ] = SOLR_LABEL
169216 mappings [METADATA_DESCRIPTION ] = SOLR_DESCRIPTION
170217 mappings ["source_collection" ] = SOLR_SOURCE # this isn't present in the exported metadata
171- mappings [METADATA_HAS_SPECIMEN_CATEGORY ] = SOLR_HAS_SPECIMEN_CATEGORY
172- mappings [METADATA_HAS_MATERIAL_CATEGORY ] = SOLR_HAS_MATERIAL_CATEGORY
173- mappings [METADATA_HAS_CONTEXT_CATEGORY ] = SOLR_HAS_CONTEXT_CATEGORY
218+ mappings [METADATA_HAS_SPECIMEN_CATEGORY ] = self . _has_specimen_categories
219+ mappings [METADATA_HAS_MATERIAL_CATEGORY ] = self . _has_material_categories
220+ mappings [METADATA_HAS_CONTEXT_CATEGORY ] = self . _has_context_categories
174221 mappings [METADATA_INFORMAL_CLASSIFICATION ] = SOLR_INFORMAL_CLASSIFICATION
175- mappings [METADATA_KEYWORDS ] = SOLR_KEYWORDS
222+ mappings [METADATA_KEYWORDS ] = self . _keywords
176223 mappings [METADATA_PRODUCED_BY ] = self ._produced_by_dict
177224 mappings [METADATA_REGISTRANT ] = self ._registrant_dict
178225 mappings [METADATA_SAMPLING_PURPOSE ] = SOLR_SAMPLING_PURPOSE
0 commit comments