5
5
import re
6
6
import typing
7
7
from collections import Counter
8
+ from dateutil import parser as date_parser
8
9
from pathlib import Path
9
10
from typing import Any , Callable , Dict , List , Optional , Set , TextIO , Tuple , Union , cast
10
11
from urllib .request import urlopen
24
25
add_built_in_prefixes_to_prefix_map ,
25
26
get_default_metadata ,
26
27
)
27
- from .sssom_datamodel import Mapping , MappingSet
28
+ from .sssom_datamodel import Mapping , MappingSet , MatchTypeEnum
28
29
from .sssom_document import MappingSetDocument
29
30
from .typehints import Metadata , MetadataType , PrefixMap
30
31
from .util import (
@@ -140,6 +141,24 @@ def read_obographs_json(
140
141
)
141
142
142
143
144
+ def read_snomed_icd10cm_map_tsv (
145
+ file_path : str ,
146
+ prefix_map : Dict [str , str ] = None ,
147
+ meta : Dict [str , str ] = None ,
148
+ ) -> MappingSetDataFrame :
149
+ """Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
150
+
151
+ :param file_path: The path to the obographs file
152
+ :param prefix_map: an optional prefix map
153
+ :param meta: an optional dictionary of metadata elements
154
+ :return: A SSSOM MappingSetDataFrame
155
+ """
156
+ raise_for_bad_path (file_path )
157
+ df = read_pandas (file_path )
158
+ df2 = from_snomed_icd10cm_map_tsv (df , prefix_map = prefix_map , meta = meta )
159
+ return df2
160
+
161
+
143
162
def _get_prefix_map_and_metadata (
144
163
prefix_map : Optional [PrefixMap ] = None , meta : Optional [MetadataType ] = None
145
164
) -> Metadata :
@@ -499,6 +518,144 @@ def from_obographs(
499
518
return to_mapping_set_dataframe (mdoc )
500
519
501
520
521
+ def from_snomed_icd10cm_map_tsv (
522
+ df : pd .DataFrame ,
523
+ prefix_map : Optional [PrefixMap ] = None ,
524
+ meta : Optional [MetadataType ] = None ,
525
+ ) -> MappingSetDataFrame :
526
+ """Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
527
+
528
+ :param df: A mappings dataframe
529
+ :param prefix_map: A prefix map
530
+ :param meta: A metadata dictionary
531
+ :return: MappingSetDataFrame
532
+
533
+ # Field descriptions
534
+ # - Taken from: doc_Icd10cmMapReleaseNotes_Current-en-US_US1000124_20210901.pdf
535
+ FIELD,DATA_TYPE,PURPOSE,Joe's comments
536
+ - id,UUID,A 128 bit unsigned integer, uniquely identifying the map record,
537
+ - effectiveTime,Time,Specifies the inclusive date at which this change becomes effective.,
538
+ - active,Boolean,Specifies whether the member’s state was active (=1) or inactive (=0) from the nominal release date
539
+ specified by the effectiveTime field.,
540
+ - moduleId,SctId,Identifies the member version’s module. Set to a child of 900000000000443000|Module| within the
541
+ metadata hierarchy.,The only value in the entire set is '5991000124107', which has label 'SNOMED CT to ICD-10-CM
542
+ rule-based mapping module' (
543
+ https://www.findacode.com/snomed/5991000124107--snomed-ct-to-icd-10-cm-rule-based-mapping-module.html).
544
+ - refSetId,SctId,Set to one of the children of the |Complex map type| concept in the metadata hierarchy.,The only
545
+ value in the entire set is '5991000124107', which has label 'ICD-10-CM complex map reference set' (
546
+ https://www.findacode.com/snomed/6011000124106--icd-10-cm-complex-map-reference-set.html).
547
+ - referencedComponentId,SctId,The SNOMED CT source concept ID that is the subject of the map record.,
548
+ - mapGroup,Integer,An integer identifying a grouping of complex map records which will designate one map target at
549
+ the time of map rule evaluation. Source concepts that require two map targets for classification will have two sets
550
+ of map groups.,
551
+ - mapPriority,Integer,Within a map group, the mapPriority specifies the order in which complex map records should be
552
+ evaluated to determine the correct map target.,
553
+ - mapRule,String,A machine-readable rule, (evaluating to either ‘true’ or ‘false’ at run-time) that indicates
554
+ whether this map record should be selected within its map group.,
555
+ - mapAdvice,String,Human-readable advice that may be employed by the software vendor to give an end-user advice on
556
+ selection of the appropriate target code. This includes a) a summary statement of the map rule logic, b) a statement
557
+ of any limitations of the map record and c) additional classification guidance for the coding professional.,
558
+ - mapTarget,String,The target ICD-10 classification code of the map record.,
559
+ - correlationId,SctId,A child of |Map correlation value| in the metadata hierarchy, identifying the correlation
560
+ between the SNOMED CT concept and the target code.,
561
+ - mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
562
+ associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.,
563
+ """
564
+ # https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
565
+ match_type_snomed_unspecified_id = 447561005
566
+ prefix_map = _ensure_prefix_map (prefix_map )
567
+ ms = _init_mapping_set (meta )
568
+
569
+ mlist : List [Mapping ] = []
570
+ for _ , row in df .iterrows ():
571
+ mdict = {
572
+ 'subject_id' : f'SNOMED:{ row ["referencedComponentId" ]} ' ,
573
+ 'subject_label' : row ['referencedComponentName' ],
574
+
575
+ # 'predicate_id': 'skos:exactMatch',
576
+ # - mapCategoryId: can use for mapping predicate? Or is correlationId more suitable?
577
+ # or is there a SKOS predicate I can map to in case where predicate is unknown? I think most of these
578
+ # mappings are attempts at exact matches, but I can't be sure (at least not without using these fields
579
+ # to determine: mapGroup, mapPriority, mapRule, mapAdvice).
580
+ # mapCategoryId,mapCategoryName: Only these in set: 447637006 "MAP SOURCE CONCEPT IS PROPERLY CLASSIFIED",
581
+ # 447638001 "MAP SOURCE CONCEPT CANNOT BE CLASSIFIED WITH AVAILABLE DATA",
582
+ # 447639009 "MAP OF SOURCE CONCEPT IS CONTEXT DEPENDENT"
583
+ # 'predicate_modifier': '???',
584
+ # Description: Modifier for negating the prediate. See https://github.com/mapping-commons/sssom/issues/40
585
+ # Range: PredicateModifierEnum: (joe: only lists 'Not' as an option)
586
+ # Example: Not Negates the predicate, see documentation of predicate_modifier_enum
587
+ # - predicate_id <- mapAdvice?
588
+ # - predicate_modifier <- mapAdvice?
589
+ # mapAdvice: Pipe-delimited qualifiers. Ex:
590
+ # "ALWAYS Q71.30 | CONSIDER LATERALITY SPECIFICATION"
591
+ # "IF LISSENCEPHALY TYPE 3 FAMILIAL FETAL AKINESIA SEQUENCE SYNDROME CHOOSE Q04.3 | MAP OF SOURCE CONCEPT
592
+ # IS CONTEXT DEPENDENT"
593
+ # "MAP SOURCE CONCEPT CANNOT BE CLASSIFIED WITH AVAILABLE DATA"
594
+ 'predicate_id' : f'SNOMED:{ row ["mapCategoryId" ]} ' ,
595
+ 'predicate_label' : row ['mapCategoryName' ],
596
+
597
+ 'object_id' : f'ICD10CM:{ row ["mapTarget" ]} ' ,
598
+ 'object_label' : row ['mapTargetName' ],
599
+
600
+ # match_type <- mapRule?
601
+ # ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE. Does this
602
+ # mean I could use skos:exactMatch in these cases?
603
+ # match_type <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded
604
+ # SNOMED mappings, all of them had correlationId of 447561005, which also happens to be 'unspecified'.
605
+ # If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
606
+ # field for 'match_type'.
607
+ 'match_type' : MatchTypeEnum ('Unspecified' ) if row ['correlationId' ] == match_type_snomed_unspecified_id \
608
+ else MatchTypeEnum ('Unspecified' ),
609
+
610
+ 'mapping_date' : date_parser .parse (str (row ['effectiveTime' ])).date (),
611
+ 'other' : '|' .join ([f'{ k } ={ str (row [k ])} ' for k in [
612
+ 'id' ,
613
+ 'active' ,
614
+ 'moduleId' ,
615
+ 'refsetId' ,
616
+ 'mapGroup' ,
617
+ 'mapPriority' ,
618
+ 'mapRule' ,
619
+ 'mapAdvice' ,
620
+ ]]),
621
+
622
+ # More fields (https://mapping-commons.github.io/sssom/Mapping/):
623
+ # - subject_category: absent
624
+ # - author_id: can this be "SNOMED"?
625
+ # - author_label: can this be "SNOMED"?
626
+ # - reviewer_id: can this be "SNOMED"?
627
+ # - reviewer_label: can this be "SNOMED"?
628
+ # - creator_id: can this be "SNOMED"?
629
+ # - creator_label: can this be "SNOMED"?
630
+ # - license: Is this something that can be determined?
631
+ # - subject_source: URL of some official page for SNOMED version used?
632
+ # - subject_source_version: Is this knowable?
633
+ # - objectCategory <= mapRule?
634
+ # mapRule: ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE.
635
+ # Does this mean I could use skos:exactMatch in these cases?
636
+ # object_category:
637
+ # objectCategory:
638
+ # Description: The conceptual category to which the subject belongs to. This can be a string denoting
639
+ # the category or a term from a controlled vocabulary.
640
+ # Example: UBERON:0001062 (The CURIE of the Uberon term for "anatomical entity".)
641
+ # - object_source: URL of some official page for ICD10CM version used?
642
+ # - object_source_version: would this be "10CM" as in "ICD10CM"? Or something else? Or nothing?
643
+ # - mapping_provider: can this be "SNOMED"?
644
+ # - mapping_cardinality: Could I determine 1:1 or 1:many or many:1 based on:
645
+ # mapGroup, mapPriority, mapRule, mapAdvice?
646
+ # - match_term_type: What is this?
647
+ # - see_also: Should this be a URL to the SNOMED term?
648
+ # - comment: Description: Free text field containing either curator notes or text generated by tool providing
649
+ # additional informative information.
650
+ }
651
+ mlist .append (_prepare_mapping (Mapping (** mdict )))
652
+
653
+ ms .mappings = mlist
654
+ _set_metadata_in_mapping_set (mapping_set = ms , metadata = meta )
655
+ doc = MappingSetDocument (mapping_set = ms , prefix_map = prefix_map )
656
+ return to_mapping_set_dataframe (doc )
657
+
658
+
502
659
# All from_* take as an input a python object (data frame, json, etc) and return a MappingSetDataFrame
503
660
# All read_* take as an input a a file handle and return a MappingSetDataFrame (usually wrapping a from_* method)
504
661
@@ -523,6 +680,9 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
523
680
return read_alignment_xml
524
681
elif input_format == "obographs-json" :
525
682
return read_obographs_json
683
+ elif input_format == "snomed-icd10cm-map-tsv" :
684
+ return read_snomed_icd10cm_map_tsv
685
+
526
686
else :
527
687
raise Exception (f"Unknown input format: { input_format } " )
528
688
0 commit comments