Skip to content

Commit 2a29806

Browse files
committed
Update: SNOMED Complex Map: Now filters out any mappings that aren't of 'ALWAYS' level of confidence.
1 parent b74327f commit 2a29806

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

sssom/parsers.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
from linkml_runtime.loaders.json_loader import JSONLoader
2121
from rdflib import Graph, URIRef
2222

23-
# TODO: PR comment: where matchtypeenum? can't find sssomschema, Mapping, or MappingSet. only MappingSetDataFrame
24-
# from .sssom_datamodel import Mapping, MappingSet, MatchTypeEnum
2523
from sssom_schema import Mapping, MappingSet
2624

2725

@@ -268,17 +266,21 @@ def parse_snomed_complex_map_tsv(
268266
file_path: str,
269267
prefix_map: Dict[str, str] = None,
270268
meta: Dict[str, str] = None,
269+
filter_by_confident_mappings=True
271270
) -> MappingSetDataFrame:
272271
"""Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
273272
274-
:param file_path: The path to the obographs file
273+
:param file_path: The path to the source file
275274
:param prefix_map: an optional prefix map
276275
:param meta: an optional dictionary of metadata elements
276+
:param filter_by_confident_mappings: Will only include mapping rows where the `mapAdvice` field includes an 'ALWAYS
277+
<code>' pattern.
277278
:return: A SSSOM MappingSetDataFrame
278279
"""
279280
raise_for_bad_path(file_path)
280281
df = read_pandas(file_path)
281-
df2 = from_snomed_complex_map_tsv(df, prefix_map=prefix_map, meta=meta)
282+
df2 = from_snomed_complex_map_tsv(
283+
df, prefix_map=prefix_map, meta=meta, filter_by_confident_mappings=filter_by_confident_mappings)
282284
return df2
283285

284286

@@ -691,12 +693,15 @@ def from_snomed_complex_map_tsv(
691693
df: pd.DataFrame,
692694
prefix_map: Optional[PrefixMap] = None,
693695
meta: Optional[MetadataType] = None,
696+
filter_by_confident_mappings=True
694697
) -> MappingSetDataFrame:
695698
"""Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
696699
697700
:param df: A mappings dataframe
698701
:param prefix_map: A prefix map
699702
:param meta: A metadata dictionary
703+
:param filter_by_confident_mappings: Will only include mapping rows where the `mapAdvice` field includes an 'ALWAYS
704+
<code>' pattern.
700705
:return: MappingSetDataFrame
701706
702707
# Field descriptions
@@ -730,11 +735,23 @@ def from_snomed_complex_map_tsv(
730735
- mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
731736
associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.,
732737
"""
738+
# Local variables
733739
# https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
734-
match_type_snomed_unspecified_id = 447561005
740+
mapping_justification_snomed_unspecified_id = 447561005
741+
# - Note: joeflack4: I used this info as a reference for this pattern.
742+
# https://www.medicalbillingandcoding.org/icd-10-cm/#:~:text=ICD%2D10%2DCM%20is%20a,decimal%20point%20and%20the%20subcategory.
743+
always_confidence_pattern = 'ALWAYS [A-Z]{1}[0-9]{1,2}\.[0-9A-Z]{1,4}'
744+
always_confidence_antipattern = always_confidence_pattern + '\?'
735745
prefix_map = _ensure_prefix_map(prefix_map)
736746
ms = _init_mapping_set(meta)
737747

748+
# Filtering
749+
if filter_by_confident_mappings:
750+
df = df[
751+
(df['mapAdvice'].str.contains(always_confidence_pattern, regex=True, na=False)) &
752+
(~df['mapAdvice'].str.contains(always_confidence_antipattern, regex=True, na=False))]
753+
754+
# Map mappings
738755
mlist: List[Mapping] = []
739756
for _, row in df.iterrows():
740757
mdict = {
@@ -766,16 +783,20 @@ def from_snomed_complex_map_tsv(
766783
'object_id': f'ICD10CM:{row["mapTarget"]}',
767784
'object_label': row['mapTargetName'],
768785

769-
# match_type <- mapRule?
786+
# mapping_justification <- mapRule?
770787
# ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE. Does this
771788
# mean I could use skos:exactMatch in these cases?
772-
# match_type <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded
789+
# mapping_justification <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded
773790
# SNOMED mappings, all of them had correlationId of 447561005, which also happens to be 'unspecified'.
774791
# If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
775-
# field for 'match_type'.
776-
'match_type': MatchTypeEnum('Unspecified') if row['correlationId'] == match_type_snomed_unspecified_id \
777-
else MatchTypeEnum('Unspecified'),
778-
792+
# field for 'mapping_justification'.
793+
# TODO: How to properly get mapping_justification?
794+
# I think I need to use sssom_schema.slots.mapping_justification, but not sure how to use.
795+
# slots.mapping_justification = Slot(uri=SSSOM.mapping_justification, name="mapping_justification", curie=SSSOM.curie('mapping_justification'),
796+
# model_uri=SSSOM.mapping_justification, domain=None, range=Union[str, EntityReference],
797+
# pattern=re.compile(r'^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining)$'))
798+
'mapping_justification':
799+
'Unspecified' if row['correlationId'] == mapping_justification_snomed_unspecified_id else 'Unspecified',
779800
'mapping_date': date_parser.parse(str(row['effectiveTime'])).date(),
780801
'other': '|'.join([f'{k}={str(row[k])}' for k in [
781802
'id',

0 commit comments

Comments
 (0)