diff --git a/src/sssom/constants.py b/src/sssom/constants.py index b858405c..65cf4afb 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -10,6 +10,7 @@ import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView +from sssom_schema.datamodel.sssom_schema import SssomVersionEnum HERE = pathlib.Path(__file__).parent.resolve() @@ -89,6 +90,7 @@ MAPPING_SET_SOURCE = "mapping_set_source" MAPPING_SOURCE = "mapping_source" MAPPING_CARDINALITY = "mapping_cardinality" +CARDINALITY_SCOPE = "cardinality_scope" MAPPING_TOOL = "mapping_tool" MAPPING_TOOL_VERSION = "mapping_tool_version" MAPPING_DATE = "mapping_date" @@ -278,6 +280,27 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots + def get_minimum_version(self, slot_name: str, class_name: str = "mapping") -> SssomVersionEnum: + """Get the minimum version of SSSOM required for a given slot. + + :param slot_name: The queried slot. + :param class_name: The class the slot belongs to. This is needed + because a slot may have been added to a class + in a later version than the version in which + it was first introduced in the schema. + :return: A SssomVersionEnum value representing the earliest + version of SSSOM that defines the given slot in the + given class. May be None if the requested slot name + is not a valid slot name. + """ + try: + slot = self.view.induced_slot(slot_name, class_name) + return SssomVersionEnum(slot.annotations.added_in.value) + except AttributeError: # No added_in annotation, defaults to 1.0 + return SssomVersionEnum("1.0") + except ValueError: # No such slot + return None + @lru_cache(1) def _get_sssom_schema_object() -> SSSOMSchemaView: diff --git a/src/sssom/util.py b/src/sssom/util.py index b49d08fd..a60aa1dd 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -26,6 +26,7 @@ COLUMN_INVERT_DICTIONARY, COMMENT, CONFIDENCE, + MAPPING_CARDINALITY, MAPPING_JUSTIFICATION, MAPPING_SET_ID, MAPPING_SET_SOURCE, @@ -33,6 +34,7 @@ OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, + OBJECT_TYPE, OBO_HAS_DB_XREF, OWL_DIFFERENT_FROM, OWL_EQUIVALENT_CLASS, @@ -55,6 +57,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SUBJECT_TYPE, UNKNOWN_IRI, MetadataType, PathOrIO, @@ -393,6 +396,52 @@ def condense(self) -> List[str]: self.df.drop(columns=condensed, inplace=True) return condensed + def get_compatible_version(self) -> str: + """Get the minimum version of SSSOM this set is compatible with.""" + schema = SSSOMSchemaView() + versions = set() + + # First get the minimum versions required by the slots present + # in the set; this is entirely provided by the SSSOM model. + for slot in self.metadata.keys(): + version = schema.get_minimum_version(slot, "mapping set") + if version is not None: + versions.add(str(version)) + for slot in self.df.columns: + version = schema.get_minimum_version(slot, "mapping") + if version is not None: + versions.add(str(version)) + + # Then take care of enum values; we cannot use the SSSOM model + # for that (enum values are not tagged with an "added_in" + # annotation the way slots are), so this has to be handled + # "manually" based on the informations provided in + # . + if ( + self.metadata.get(SUBJECT_TYPE) == "composed entity expression" + or self.metadata.get(OBJECT_TYPE) == "composed entity expression" + or ( + SUBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[SUBJECT_TYPE].values + ) + or ( + OBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[OBJECT_TYPE].values + ) + ): + versions.add("1.1") + + if MAPPING_CARDINALITY in self.df.columns and "0:0" in self.df[MAPPING_CARDINALITY].values: + versions.add("1.1") + + # Get the highest of the accumulated versions. We do a numerical + # sort, so that version 1.10 (if we ever get that far in the 1.x + # branch) does not get sorted before version 1.9. + def _version_to_compare_key(version): + return tuple(int(s) for s in version.split(".")) + + return max(versions, key=_version_to_compare_key) + def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: """Standardize a CURIE or IRI, returning the original if not possible. diff --git a/tests/test_utils.py b/tests/test_utils.py index 91e187d0..ce8f7473 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,13 +12,18 @@ from sssom_schema import slots as SSSOM_Slots from sssom.constants import ( + CARDINALITY_SCOPE, CREATOR_ID, + MAPPING_CARDINALITY, OBJECT_ID, OBJECT_LABEL, + OBJECT_TYPE, PREDICATE_ID, + PREDICATE_TYPE, SEMAPV, SUBJECT_ID, SUBJECT_LABEL, + SUBJECT_TYPE, ) from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter from sssom.io import extract_iris @@ -595,3 +600,40 @@ def test_propagation_fill_empty_mode(self) -> None: self.assertIn("mapping_tool", propagated_slots) self.assertNotIn("mapping_tool", msdf.metadata) self.assertEqual(2, len(msdf.df["mapping_tool"].unique())) + + def test_inferring_compatible_version(self) -> None: + """Test that we can correctly infer the version a set is compatible with.""" + msdf10 = parse_sssom_table(f"{data_dir}/basic.tsv") + + # Nothing in that set requires 1.1 + self.assertEqual("1.0", msdf10.get_compatible_version()) + + def _clone(msdf): + return MappingSetDataFrame(df=msdf.df.copy(), metadata=msdf.metadata.copy()) + + # Inject a 1.1-specific mapping set slot + msdf11 = _clone(msdf10) + msdf11.metadata[CARDINALITY_SCOPE] = "predicate_id" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific mapping slot + msdf11 = _clone(msdf10) + msdf11.df[PREDICATE_TYPE] = "owl object property" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific entity_type_enum value + msdf11 = _clone(msdf10) + msdf11.metadata[SUBJECT_TYPE] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Same, but on a single mapping record + msdf11 = _clone(msdf10) + msdf11.df[OBJECT_TYPE] = "owl class" + msdf11.df.loc[2, OBJECT_TYPE] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject the 1.1-specific "0:0" cardinality value + msdf11 = _clone(msdf10) + msdf11.df[MAPPING_CARDINALITY] = "1:1" + msdf11.df.loc[9, MAPPING_CARDINALITY] = "0:0" + self.assertEqual("1.1", msdf11.get_compatible_version())