From b139c26b915cd991013c6267906191a330b10543 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Mon, 1 Sep 2025 18:34:52 +0100 Subject: [PATCH 1/4] Add method to infer minimum compatible SSSOM version. Add a new method to the MappingSetDataFrame class to automatically determine the minimum version of the SSSOM specification the set is compatible with -- that is, the earliest version that defines all the slots and all the enum values present in the set. --- src/sssom/constants.py | 22 +++++++++++++++++++ src/sssom/util.py | 50 ++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 37 +++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index b858405c..d7cfa808 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -10,6 +10,7 @@ import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView +from sssom_schema.datamodel.sssom_schema import SssomVersionEnum HERE = pathlib.Path(__file__).parent.resolve() @@ -278,6 +279,27 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots + def get_minimum_version(self, slot_name: str, class_name: str = "mapping"): + """Get the minimum version of SSSOM required for a given slot. + + :param slot_name: The queried slot. + :param class_name: The class the slot belongs to. This is needed + because a slot may have been added to a class + in a later version than the version in which + it was first introduced in the schema. + :return: A SssomVersionEnum value representing the earliest + version of SSSOM that defines the given slot in the + given class. May be None if the requested slot name + is not a valid slot name. + """ + try: + slot = self.view.induced_slot(slot_name, class_name) + return SssomVersionEnum(slot.annotations.added_in.value) + except AttributeError: # No added_in annotation, defaults to 1.0 + return SssomVersionEnum("1.0") + except ValueError: # No such slot + return None + @lru_cache(1) def _get_sssom_schema_object() -> SSSOMSchemaView: diff --git a/src/sssom/util.py b/src/sssom/util.py index b49d08fd..8425f9e7 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -393,6 +393,56 @@ def condense(self) -> List[str]: self.df.drop(columns=condensed, inplace=True) return condensed + def get_compatible_version(self): + """Get the minimum version of SSSOM this set is compatible with.""" + schema = SSSOMSchemaView() + versions = set() + + # First get the minimum versions required by the slots present + # in the set; this is entirely provided by the SSSOM model. + for slot in self.metadata.keys(): + version = schema.get_minimum_version(slot, "mapping set") + if version is not None: + versions.add(str(version)) + for slot in self.df.columns: + version = schema.get_minimum_version(slot, "mapping") + if version is not None: + versions.add(str(version)) + + # Then take care of enum values; we cannot use the SSSOM model + # for that (enum values are not tagged with an "added_in" + # annotation the way slots are), so this has to be handled + # "manually" based on the informations provided in + # . + if ( + self.metadata.get("subject_type") == "composed entity expression" + or self.metadata.get("subject_type") == "composed entity expression" + or ( + "subject_type" in self.df.columns + and "composed entity expression" in self.df["subject_type"].values + ) + or ( + "object_type" in self.df.columns + and "composed entity expression" in self.df["object_type"].values + ) + ): + versions.add("1.1") + + if ( + "mapping_cardinality" in self.df.columns + and "0:0" in self.df["mapping_cardinality"].values + ): + versions.add("1.1") + + # Get the highest of the accumulated versions. We do a numerical + # sort, so that version 1.10 (if we ever get that far in the 1.x + # branch) does not get sorted before version 1.9. + def _version_to_compare_key(version): + major, minor = [int(s) for s in version.split(".")] + return (major * 100) + minor + + return sorted(versions, key=_version_to_compare_key)[-1] + def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: """Standardize a CURIE or IRI, returning the original if not possible. diff --git a/tests/test_utils.py b/tests/test_utils.py index 91e187d0..206deffc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -595,3 +595,40 @@ def test_propagation_fill_empty_mode(self) -> None: self.assertIn("mapping_tool", propagated_slots) self.assertNotIn("mapping_tool", msdf.metadata) self.assertEqual(2, len(msdf.df["mapping_tool"].unique())) + + def test_inferring_compatible_version(self) -> None: + """Test that we can correctly infer the version a set is compatible with.""" + msdf10 = parse_sssom_table(f"{data_dir}/basic.tsv") + + # Nothing in that set requires 1.1 + self.assertEqual("1.0", msdf10.get_compatible_version()) + + def _clone(msdf): + return MappingSetDataFrame(df=msdf.df.copy(), metadata=msdf.metadata.copy()) + + # Inject a 1.1-specific mapping set slot + msdf11 = _clone(msdf10) + msdf11.metadata["cardinality_scope"] = "predicate_id" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific mapping slot + msdf11 = _clone(msdf10) + msdf11.df["predicate_type"] = "owl object property" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific entity_type_enum value + msdf11 = _clone(msdf10) + msdf11.metadata["subject_type"] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Same, but on a single mapping record + msdf11 = _clone(msdf10) + msdf11.df["object_type"] = "owl class" + msdf11.df.loc[2, "object_type"] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject the 1.1-specific "0:0" cardinality value + msdf11 = _clone(msdf10) + msdf11.df["mapping_cardinality"] = "1:1" + msdf11.df.loc[9, "mapping_cardinality"] = "0:0" + self.assertEqual("1.1", msdf11.get_compatible_version()) From 90066071a432c7470ec758f9d36a42b7fdeef5d2 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 20:53:22 +0100 Subject: [PATCH 2/4] Add missing return type hints. --- src/sssom/constants.py | 2 +- src/sssom/util.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index d7cfa808..701200e8 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -279,7 +279,7 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots - def get_minimum_version(self, slot_name: str, class_name: str = "mapping"): + def get_minimum_version(self, slot_name: str, class_name: str = "mapping") -> SssomVersionEnum: """Get the minimum version of SSSOM required for a given slot. :param slot_name: The queried slot. diff --git a/src/sssom/util.py b/src/sssom/util.py index 8425f9e7..4d6819c3 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -393,7 +393,7 @@ def condense(self) -> List[str]: self.df.drop(columns=condensed, inplace=True) return condensed - def get_compatible_version(self): + def get_compatible_version(self) -> str: """Get the minimum version of SSSOM this set is compatible with.""" schema = SSSOMSchemaView() versions = set() From 20ea4e8311a7797100290e75c9ac793a5acb11f4 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 21:37:02 +0100 Subject: [PATCH 3/4] Misc fixes. Fix wrong slot name when looking for "composed entity expression". Let Python compare version numbers as tuples of integers. Use `max(list)` instead of `sorted(list)[-1]`. --- src/sssom/util.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 4d6819c3..8d02d3ab 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -416,7 +416,7 @@ def get_compatible_version(self) -> str: # . if ( self.metadata.get("subject_type") == "composed entity expression" - or self.metadata.get("subject_type") == "composed entity expression" + or self.metadata.get("object_type") == "composed entity expression" or ( "subject_type" in self.df.columns and "composed entity expression" in self.df["subject_type"].values @@ -438,10 +438,9 @@ def get_compatible_version(self) -> str: # sort, so that version 1.10 (if we ever get that far in the 1.x # branch) does not get sorted before version 1.9. def _version_to_compare_key(version): - major, minor = [int(s) for s in version.split(".")] - return (major * 100) + minor + return tuple(int(s) for s in version.split(".")) - return sorted(versions, key=_version_to_compare_key)[-1] + return max(versions, key=_version_to_compare_key) def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: From 588bf4ff0d568572b6f84cf26e75df6d08ec1f7a Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 22:46:09 +0100 Subject: [PATCH 4/4] Use constants to refer to SSSOM slot names. --- src/sssom/constants.py | 1 + src/sssom/util.py | 20 ++++++++++---------- tests/test_utils.py | 19 ++++++++++++------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index 701200e8..65cf4afb 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -90,6 +90,7 @@ MAPPING_SET_SOURCE = "mapping_set_source" MAPPING_SOURCE = "mapping_source" MAPPING_CARDINALITY = "mapping_cardinality" +CARDINALITY_SCOPE = "cardinality_scope" MAPPING_TOOL = "mapping_tool" MAPPING_TOOL_VERSION = "mapping_tool_version" MAPPING_DATE = "mapping_date" diff --git a/src/sssom/util.py b/src/sssom/util.py index 8d02d3ab..a60aa1dd 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -26,6 +26,7 @@ COLUMN_INVERT_DICTIONARY, COMMENT, CONFIDENCE, + MAPPING_CARDINALITY, MAPPING_JUSTIFICATION, MAPPING_SET_ID, MAPPING_SET_SOURCE, @@ -33,6 +34,7 @@ OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, + OBJECT_TYPE, OBO_HAS_DB_XREF, OWL_DIFFERENT_FROM, OWL_EQUIVALENT_CLASS, @@ -55,6 +57,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SUBJECT_TYPE, UNKNOWN_IRI, MetadataType, PathOrIO, @@ -415,23 +418,20 @@ def get_compatible_version(self) -> str: # "manually" based on the informations provided in # . if ( - self.metadata.get("subject_type") == "composed entity expression" - or self.metadata.get("object_type") == "composed entity expression" + self.metadata.get(SUBJECT_TYPE) == "composed entity expression" + or self.metadata.get(OBJECT_TYPE) == "composed entity expression" or ( - "subject_type" in self.df.columns - and "composed entity expression" in self.df["subject_type"].values + SUBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[SUBJECT_TYPE].values ) or ( - "object_type" in self.df.columns - and "composed entity expression" in self.df["object_type"].values + OBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[OBJECT_TYPE].values ) ): versions.add("1.1") - if ( - "mapping_cardinality" in self.df.columns - and "0:0" in self.df["mapping_cardinality"].values - ): + if MAPPING_CARDINALITY in self.df.columns and "0:0" in self.df[MAPPING_CARDINALITY].values: versions.add("1.1") # Get the highest of the accumulated versions. We do a numerical diff --git a/tests/test_utils.py b/tests/test_utils.py index 206deffc..ce8f7473 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,13 +12,18 @@ from sssom_schema import slots as SSSOM_Slots from sssom.constants import ( + CARDINALITY_SCOPE, CREATOR_ID, + MAPPING_CARDINALITY, OBJECT_ID, OBJECT_LABEL, + OBJECT_TYPE, PREDICATE_ID, + PREDICATE_TYPE, SEMAPV, SUBJECT_ID, SUBJECT_LABEL, + SUBJECT_TYPE, ) from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter from sssom.io import extract_iris @@ -608,27 +613,27 @@ def _clone(msdf): # Inject a 1.1-specific mapping set slot msdf11 = _clone(msdf10) - msdf11.metadata["cardinality_scope"] = "predicate_id" + msdf11.metadata[CARDINALITY_SCOPE] = "predicate_id" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject a 1.1-specific mapping slot msdf11 = _clone(msdf10) - msdf11.df["predicate_type"] = "owl object property" + msdf11.df[PREDICATE_TYPE] = "owl object property" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject a 1.1-specific entity_type_enum value msdf11 = _clone(msdf10) - msdf11.metadata["subject_type"] = "composed entity expression" + msdf11.metadata[SUBJECT_TYPE] = "composed entity expression" self.assertEqual("1.1", msdf11.get_compatible_version()) # Same, but on a single mapping record msdf11 = _clone(msdf10) - msdf11.df["object_type"] = "owl class" - msdf11.df.loc[2, "object_type"] = "composed entity expression" + msdf11.df[OBJECT_TYPE] = "owl class" + msdf11.df.loc[2, OBJECT_TYPE] = "composed entity expression" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject the 1.1-specific "0:0" cardinality value msdf11 = _clone(msdf10) - msdf11.df["mapping_cardinality"] = "1:1" - msdf11.df.loc[9, "mapping_cardinality"] = "0:0" + msdf11.df[MAPPING_CARDINALITY] = "1:1" + msdf11.df.loc[9, MAPPING_CARDINALITY] = "0:0" self.assertEqual("1.1", msdf11.get_compatible_version())