1818from dapla_metadata .dapla import user_info
1919from dapla_metadata .datasets ._merge import DatasetConsistencyStatus
2020from dapla_metadata .datasets ._merge import check_dataset_consistency
21- from dapla_metadata .datasets ._merge import check_ready_to_merge
2221from dapla_metadata .datasets ._merge import check_variables_consistency
2322from dapla_metadata .datasets ._merge import merge_metadata
24- from dapla_metadata .datasets .compatibility import is_metadata_in_container_structure
25- from dapla_metadata .datasets .compatibility import upgrade_metadata
23+ from dapla_metadata .datasets ._merge import report_metadata_consistency
24+ from dapla_metadata .datasets .compatibility ._utils import (
25+ is_metadata_in_container_structure ,
26+ )
27+ from dapla_metadata .datasets .compatibility .model_backwards_compatibility import (
28+ upgrade_metadata ,
29+ )
2630from dapla_metadata .datasets .dapla_dataset_path_info import DaplaDatasetPathInfo
2731from dapla_metadata .datasets .dataset_parser import DatasetParser
32+ from dapla_metadata .datasets .dataset_parser import pretty_print_supported_types
2833from dapla_metadata .datasets .model_validation import ValidateDatadocMetadata
2934from dapla_metadata .datasets .statistic_subject_mapping import StatisticSubjectMapping
3035from dapla_metadata .datasets .utility .constants import (
@@ -126,6 +131,7 @@ def __init__(
126131 self .variables_lookup : dict [str , VariableType ] = {}
127132 self .explicitly_defined_metadata_document = False
128133 self .dataset_consistency_status : list [DatasetConsistencyStatus ] = []
134+ self .concrete_data_types_lookup : dict [str , str ] = {}
129135 if metadata_document_path :
130136 self .metadata_document = UPath (metadata_document_path )
131137 self .explicitly_defined_metadata_document = True
@@ -169,42 +175,37 @@ def _extract_metadata_from_files(self) -> None:
169175 self .metadata_document ,
170176 )
171177
172- if (
173- self .dataset_path is not None
174- and self .dataset == all_optional_model .Dataset ()
175- and len (self .variables ) == 0
176- ):
178+ if self .dataset_path :
177179 extracted_metadata = self ._extract_metadata_from_dataset (self .dataset_path )
180+ self .dataset_consistency_status .extend (
181+ self .check_illegal_variable_data_type (
182+ extracted_metadata .variables or [], self .concrete_data_types_lookup
183+ )
184+ )
178185
179186 if (
180187 self .dataset_path
181188 and self .metadata_document
182189 and extracted_metadata
183190 and existing_metadata
184- ):
185- self .dataset_consistency_status = check_dataset_consistency (
186- self .dataset_path ,
187- self .metadata_document ,
191+ ) and self .explicitly_defined_metadata_document :
192+ self .dataset_consistency_status .extend (
193+ check_dataset_consistency (
194+ self .dataset_path ,
195+ self .metadata_document ,
196+ )
188197 )
189198 self .dataset_consistency_status .extend (
190199 check_variables_consistency (
191200 extracted_metadata .variables or [],
192201 existing_metadata .variables or [],
193202 )
194203 )
195-
196- if (
197- self .dataset_path
198- and self .explicitly_defined_metadata_document
199- and self .metadata_document is not None
200- and self .metadata_document .exists ()
201- and extracted_metadata is not None
202- and existing_metadata is not None
203- ):
204- check_ready_to_merge (
204+ report_metadata_consistency (
205205 self .dataset_consistency_status ,
206206 errors_as_warnings = self .errors_as_warnings ,
207207 )
208+ # Merge existing metadata with a new dataset
208209 merged_metadata = merge_metadata (
209210 extracted_metadata ,
210211 existing_metadata ,
@@ -215,8 +216,31 @@ def _extract_metadata_from_files(self) -> None:
215216 self .dataset_path ,
216217 )
217218 self ._set_metadata (merged_metadata )
218- else :
219- self ._set_metadata (existing_metadata or extracted_metadata )
219+ return
220+
221+ report_metadata_consistency (
222+ self .dataset_consistency_status ,
223+ errors_as_warnings = self .errors_as_warnings ,
224+ message = "Problems were detected with the metadata." ,
225+ )
226+ self ._set_metadata (existing_metadata or extracted_metadata )
227+
228+ def check_illegal_variable_data_type (
229+ self , variables : VariableListType , concrete_data_types_lookup : dict [str , str ]
230+ ) -> list [DatasetConsistencyStatus ]:
231+ """Check whether any of the variable types are unsupported.
232+
233+ When we encounter a variable which is unsupported, the `DatasetParser` sets the variable `data_type` to `None`.
234+ This function detects that situation and creates a friendly error message to inform of the situation.
235+ """
236+ return [
237+ DatasetConsistencyStatus (
238+ message = f"Unsupported data type for variable '{ v .short_name } ' type: '{ concrete_data_types_lookup .get (v .short_name , 'unknown' )} ' from dataset { self .dataset_path } \n Please change the type of the variable to one of the supported options:\n { pretty_print_supported_types ()} " ,
239+ success = False ,
240+ )
241+ for v in variables
242+ if v .short_name and not v .data_type
243+ ]
220244
221245 def _set_metadata (
222246 self ,
@@ -369,6 +393,14 @@ def _extract_metadata_from_dataset(
369393 spatial_coverage_description = DEFAULT_SPATIAL_COVERAGE_DESCRIPTION ,
370394 )
371395 metadata .variables = DatasetParser .for_file (dataset ).get_fields ()
396+ try :
397+ self .concrete_data_types_lookup = DatasetParser .for_file (
398+ dataset
399+ ).get_concrete_data_types ()
400+ except RuntimeError :
401+ logger .exception (
402+ "Failed to get concrete data types for dataset %s" , dataset
403+ )
372404 return metadata
373405
374406 @staticmethod
0 commit comments