Skip to content

Commit 7398ac4

Browse files
committed
New Feature: SNOMED::ICD10CM Mapping Support
- Added feature to allow for conversion of these premade mappings provided by SNOMED into SSSOM format. (WIP) General updates - cli.py: Reorganized SSSOM_READ_FORMATS: Top half are plain data formats, and bottom half are special-case formats. Both halves of the list are alphabetically sorted. Temp updates - Changed some relative imports to absolute imports, in order to speed up development and make debugging easier. It is possible that this could be a good permanent change too, though.
1 parent bf9c32b commit 7398ac4

File tree

3 files changed

+148
-10
lines changed

3 files changed

+148
-10
lines changed

sssom/cli.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
from rdflib import Graph
2525
from scipy.stats import chi2_contingency
2626

27-
from .cliques import split_into_cliques, summarize_cliques
28-
from .io import convert_file, parse_file, split_file, validate_file
29-
from .parsers import read_sssom_table
30-
from .rdf_util import rewire_graph
31-
from .sparql_util import EndpointConfig, query_mappings
32-
from .util import (
27+
from sssom.cliques import split_into_cliques, summarize_cliques
28+
from sssom.io import convert_file, parse_file, split_file, validate_file
29+
from sssom.parsers import read_sssom_table
30+
from sssom.rdf_util import rewire_graph
31+
from sssom.sparql_util import EndpointConfig, query_mappings
32+
from sssom.util import (
3333
SSSOM_EXPORT_FORMATS,
3434
SSSOM_READ_FORMATS,
3535
MappingSetDataFrame,
@@ -41,7 +41,7 @@
4141
remove_unmatched,
4242
to_mapping_set_dataframe,
4343
)
44-
from .writers import write_table
44+
from sssom.writers import write_table
4545

4646
# Click input options common across commands
4747
input_argument = click.argument("input", required=True, type=click.Path())

sssom/parsers.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,24 @@ def read_obographs_json(
140140
)
141141

142142

143+
def read_snomed_icd10cm_map_tsv(
144+
file_path: str,
145+
prefix_map: Dict[str, str] = None,
146+
meta: Dict[str, str] = None,
147+
) -> MappingSetDataFrame:
148+
"""Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
149+
150+
:param file_path: The path to the obographs file
151+
:param prefix_map: an optional prefix map
152+
:param meta: an optional dictionary of metadata elements
153+
:return: A SSSOM MappingSetDataFrame
154+
"""
155+
raise_for_bad_path(file_path)
156+
df = read_pandas(file_path)
157+
df2 = from_snomed_icd10cm_map_tsv(df, prefix_map=prefix_map, meta=meta)
158+
return df2
159+
160+
143161
def _get_prefix_map_and_metadata(
144162
prefix_map: Optional[PrefixMap] = None, meta: Optional[MetadataType] = None
145163
) -> Metadata:
@@ -499,6 +517,122 @@ def from_obographs(
499517
return to_mapping_set_dataframe(mdoc)
500518

501519

520+
def from_snomed_icd10cm_map_tsv(
521+
df: pd.DataFrame,
522+
prefix_map: Optional[PrefixMap] = None,
523+
meta: Optional[MetadataType] = None,
524+
) -> MappingSetDataFrame:
525+
"""Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
526+
527+
:param df: A mappings dataframe
528+
:param prefix_map: A prefix map
529+
:param meta: A metadata dictionary
530+
:return: MappingSetDataFrame
531+
532+
# Field descriptions
533+
# - Taken from: doc_Icd10cmMapReleaseNotes_Current-en-US_US1000124_20210901.pdf
534+
FIELD,DATA_TYPE,PURPOSE
535+
- id,UUID,A 128 bit unsigned integer, uniquely identifying the map record
536+
- effectiveTime,Time,Specifies the inclusive date at which this change becomes effective.
537+
- active,Boolean,Specifies whether the member’s state was active (=1) or inactive (=0) from the nominal release date
538+
specified by the effectiveTime field.
539+
- moduleId,SctId,Identifies the member version’s module. Set to a child of 900000000000443000|Module| within the
540+
metadata hierarchy.
541+
- refSetId,SctId,Set to one of the children of the |Complex map type| concept in the metadata hierarchy.
542+
- referencedComponentId,SctId,The SNOMED CT source concept ID that is the subject of the map record.
543+
- mapGroup,Integer,An integer identifying a grouping of complex map records which will designate one map target at
544+
the time of map rule evaluation. Source concepts that require two map targets for classification will have two sets
545+
of map groups.
546+
- mapPriority,Integer,Within a map group, the mapPriority specifies the order in which complex map records should be
547+
evaluated to determine the correct map target.
548+
- mapRule,String,A machine-readable rule, (evaluating to either ‘true’ or ‘false’ at run-time) that indicates
549+
whether this map record should be selected within its map group
550+
- mapAdvice,String,Human-readable advice that may be employed by the software vendor to give an end-user advice on
551+
selection of the appropriate target code. This includes a) a summary statement of the map rule logic, b) a statement
552+
of any limitations of the map record and c) additional classification guidance for the coding professional.
553+
- mapTarget,String,The target ICD-10 classification code of the map record.
554+
- correlationId,SctId,A child of |Map correlation value| in the metadata hierarchy, identifying the correlation
555+
between the SNOMED CT concept and the target code.
556+
- mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
557+
associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.
558+
"""
559+
# TODO: If using in the end, import at top of file
560+
from dateutil import parser as date_parser
561+
from .sssom_datamodel import MatchTypeEnum
562+
563+
prefix_map = _ensure_prefix_map(prefix_map)
564+
ms = _init_mapping_set(meta)
565+
# https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
566+
match_type_snomed_unspecified_id = 447561005
567+
568+
mlist: List[Mapping] = []
569+
for _, row in df.iterrows():
570+
# This may look redundant, but I want to be explicit. In officially downloaded SNOMED mappings, all of them
571+
# had correlationId of 447561005, which also happens to be 'unspecified'.
572+
match_type = MatchTypeEnum('Unspecified') if row['correlationId'] == match_type_snomed_unspecified_id \
573+
else MatchTypeEnum('Unspecified')
574+
# TODO: SNOMED: parse as many as possible:
575+
# - id
576+
# - active
577+
# - moduleId
578+
# - refsetId
579+
# - mapGroup
580+
# - mapPriority
581+
# - mapRule
582+
# - mapAdvice
583+
584+
# TODO: SSSOM: use as many as possible
585+
# - subject_category: Optional[str] = None
586+
# - predicate_modifier: Optional[Union[str, "PredicateModifierEnum"]] = None
587+
# - object_category: Optional[str] = None
588+
# - comment: Optional[str] = None
589+
590+
# - author_id: can this be "SNOMED"?
591+
# - author_label: can this be "SNOMED"?
592+
# - reviewer_id: can this be "SNOMED"?
593+
# - reviewer_label: can this be "SNOMED"?
594+
# - creator_id: can this be "SNOMED"?
595+
# - creator_label: can this be "SNOMED"?
596+
# - license: Is this something that can be determined?
597+
# - subject_source: URL of some official page for SNOMED version used?
598+
# - subject_source_version: Is this knowable?
599+
# - object_source: URL of some official page for ICD10CM version used?
600+
# - object_source_version: would this be "10CM" as in "ICD10CM"? Or something else? Or nothing?
601+
# - mapping_provider: can this be "SNOMED"?
602+
# - mapping_cardinality: Could I determine 1:1 or 1:many or many:1 based on:
603+
# ...mapGroup, mapPriority, mapRule, mapAdvice?
604+
# - match_term_type: What is this?
605+
# - see_also: Should this be a URL to the SNOMED term?
606+
# - other: What would I put here that I wouldn't put in 'comment'?
607+
mdict = {
608+
'subject_id': f'SNOMED:{row["referencedComponentId"]}',
609+
'subject_label': row['referencedComponentName'],
610+
# Does this represent what we want for our mapping predicate? Or is correlationId more suitable?
611+
# ...or is there a SKOS predicate I can map to in case where predicate is unknown? I think most of these
612+
# ...mappings are attempts at exact matches, but I can't be sure (at least not without using these fields
613+
# ...to determine: mapGroup, mapPriority, mapRule, mapAdvice).
614+
'predicate_id': f'SNOMED:{row["mapCategoryId"]}',
615+
'predicate_label': row['mapCategoryName'],
616+
'object_id': f'ICD10CM:{row["mapTarget"]}',
617+
'object_label': row['mapTargetName'],
618+
# If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
619+
# ...field for 'match_type'.
620+
'match_type': match_type,
621+
'mapping_date': date_parser.parse(str(row['effectiveTime'])).date(),
622+
# 'xxx2': 'yyy',
623+
# 'xxx3': 'yyy',
624+
# 'xxx4': 'yyy',
625+
# 'xxx5': 'yyy',
626+
# 'xxx6': 'yyy',
627+
}
628+
mlist.append(_prepare_mapping(Mapping(**mdict)))
629+
630+
ms.mappings = mlist # type:ignore
631+
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
632+
doc = MappingSetDocument(mapping_set=ms, prefix_map=prefix_map)
633+
return to_mapping_set_dataframe(doc)
634+
635+
502636
# All from_* take as an input a python object (data frame, json, etc) and return a MappingSetDataFrame
503637
# All read_* take as an input a a file handle and return a MappingSetDataFrame (usually wrapping a from_* method)
504638

@@ -523,6 +657,9 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
523657
return read_alignment_xml
524658
elif input_format == "obographs-json":
525659
return read_obographs_json
660+
elif input_format == "snomed-icd10cm-map-tsv":
661+
return read_snomed_icd10cm_map_tsv
662+
526663
else:
527664
raise Exception(f"Unknown input format: {input_format}")
528665

sssom/util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,13 @@
4343
PREFIX_MAP_KEY = "curie_map"
4444

4545
SSSOM_READ_FORMATS = [
46-
"tsv",
47-
"rdf",
46+
"json",
4847
"owl",
48+
"rdf",
49+
"tsv",
4950
"alignment-api-xml",
5051
"obographs-json",
51-
"json",
52+
"snomed-icd10cm-map-tsv"
5253
]
5354
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json"]
5455

0 commit comments

Comments
 (0)