@@ -140,6 +140,24 @@ def read_obographs_json(
140
140
)
141
141
142
142
143
+ def read_snomed_icd10cm_map_tsv (
144
+ file_path : str ,
145
+ prefix_map : Dict [str , str ] = None ,
146
+ meta : Dict [str , str ] = None ,
147
+ ) -> MappingSetDataFrame :
148
+ """Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
149
+
150
+ :param file_path: The path to the obographs file
151
+ :param prefix_map: an optional prefix map
152
+ :param meta: an optional dictionary of metadata elements
153
+ :return: A SSSOM MappingSetDataFrame
154
+ """
155
+ raise_for_bad_path (file_path )
156
+ df = read_pandas (file_path )
157
+ df2 = from_snomed_icd10cm_map_tsv (df , prefix_map = prefix_map , meta = meta )
158
+ return df2
159
+
160
+
143
161
def _get_prefix_map_and_metadata (
144
162
prefix_map : Optional [PrefixMap ] = None , meta : Optional [MetadataType ] = None
145
163
) -> Metadata :
@@ -499,6 +517,122 @@ def from_obographs(
499
517
return to_mapping_set_dataframe (mdoc )
500
518
501
519
520
+ def from_snomed_icd10cm_map_tsv (
521
+ df : pd .DataFrame ,
522
+ prefix_map : Optional [PrefixMap ] = None ,
523
+ meta : Optional [MetadataType ] = None ,
524
+ ) -> MappingSetDataFrame :
525
+ """Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
526
+
527
+ :param df: A mappings dataframe
528
+ :param prefix_map: A prefix map
529
+ :param meta: A metadata dictionary
530
+ :return: MappingSetDataFrame
531
+
532
+ # Field descriptions
533
+ # - Taken from: doc_Icd10cmMapReleaseNotes_Current-en-US_US1000124_20210901.pdf
534
+ FIELD,DATA_TYPE,PURPOSE
535
+ - id,UUID,A 128 bit unsigned integer, uniquely identifying the map record
536
+ - effectiveTime,Time,Specifies the inclusive date at which this change becomes effective.
537
+ - active,Boolean,Specifies whether the member’s state was active (=1) or inactive (=0) from the nominal release date
538
+ specified by the effectiveTime field.
539
+ - moduleId,SctId,Identifies the member version’s module. Set to a child of 900000000000443000|Module| within the
540
+ metadata hierarchy.
541
+ - refSetId,SctId,Set to one of the children of the |Complex map type| concept in the metadata hierarchy.
542
+ - referencedComponentId,SctId,The SNOMED CT source concept ID that is the subject of the map record.
543
+ - mapGroup,Integer,An integer identifying a grouping of complex map records which will designate one map target at
544
+ the time of map rule evaluation. Source concepts that require two map targets for classification will have two sets
545
+ of map groups.
546
+ - mapPriority,Integer,Within a map group, the mapPriority specifies the order in which complex map records should be
547
+ evaluated to determine the correct map target.
548
+ - mapRule,String,A machine-readable rule, (evaluating to either ‘true’ or ‘false’ at run-time) that indicates
549
+ whether this map record should be selected within its map group
550
+ - mapAdvice,String,Human-readable advice that may be employed by the software vendor to give an end-user advice on
551
+ selection of the appropriate target code. This includes a) a summary statement of the map rule logic, b) a statement
552
+ of any limitations of the map record and c) additional classification guidance for the coding professional.
553
+ - mapTarget,String,The target ICD-10 classification code of the map record.
554
+ - correlationId,SctId,A child of |Map correlation value| in the metadata hierarchy, identifying the correlation
555
+ between the SNOMED CT concept and the target code.
556
+ - mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
557
+ associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.
558
+ """
559
+ # TODO: If using in the end, import at top of file
560
+ from dateutil import parser as date_parser
561
+ from .sssom_datamodel import MatchTypeEnum
562
+
563
+ prefix_map = _ensure_prefix_map (prefix_map )
564
+ ms = _init_mapping_set (meta )
565
+ # https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
566
+ match_type_snomed_unspecified_id = 447561005
567
+
568
+ mlist : List [Mapping ] = []
569
+ for _ , row in df .iterrows ():
570
+ # This may look redundant, but I want to be explicit. In officially downloaded SNOMED mappings, all of them
571
+ # had correlationId of 447561005, which also happens to be 'unspecified'.
572
+ match_type = MatchTypeEnum ('Unspecified' ) if row ['correlationId' ] == match_type_snomed_unspecified_id \
573
+ else MatchTypeEnum ('Unspecified' )
574
+ # TODO: SNOMED: parse as many as possible:
575
+ # - id
576
+ # - active
577
+ # - moduleId
578
+ # - refsetId
579
+ # - mapGroup
580
+ # - mapPriority
581
+ # - mapRule
582
+ # - mapAdvice
583
+
584
+ # TODO: SSSOM: use as many as possible
585
+ # - subject_category: Optional[str] = None
586
+ # - predicate_modifier: Optional[Union[str, "PredicateModifierEnum"]] = None
587
+ # - object_category: Optional[str] = None
588
+ # - comment: Optional[str] = None
589
+
590
+ # - author_id: can this be "SNOMED"?
591
+ # - author_label: can this be "SNOMED"?
592
+ # - reviewer_id: can this be "SNOMED"?
593
+ # - reviewer_label: can this be "SNOMED"?
594
+ # - creator_id: can this be "SNOMED"?
595
+ # - creator_label: can this be "SNOMED"?
596
+ # - license: Is this something that can be determined?
597
+ # - subject_source: URL of some official page for SNOMED version used?
598
+ # - subject_source_version: Is this knowable?
599
+ # - object_source: URL of some official page for ICD10CM version used?
600
+ # - object_source_version: would this be "10CM" as in "ICD10CM"? Or something else? Or nothing?
601
+ # - mapping_provider: can this be "SNOMED"?
602
+ # - mapping_cardinality: Could I determine 1:1 or 1:many or many:1 based on:
603
+ # ...mapGroup, mapPriority, mapRule, mapAdvice?
604
+ # - match_term_type: What is this?
605
+ # - see_also: Should this be a URL to the SNOMED term?
606
+ # - other: What would I put here that I wouldn't put in 'comment'?
607
+ mdict = {
608
+ 'subject_id' : f'SNOMED:{ row ["referencedComponentId" ]} ' ,
609
+ 'subject_label' : row ['referencedComponentName' ],
610
+ # Does this represent what we want for our mapping predicate? Or is correlationId more suitable?
611
+ # ...or is there a SKOS predicate I can map to in case where predicate is unknown? I think most of these
612
+ # ...mappings are attempts at exact matches, but I can't be sure (at least not without using these fields
613
+ # ...to determine: mapGroup, mapPriority, mapRule, mapAdvice).
614
+ 'predicate_id' : f'SNOMED:{ row ["mapCategoryId" ]} ' ,
615
+ 'predicate_label' : row ['mapCategoryName' ],
616
+ 'object_id' : f'ICD10CM:{ row ["mapTarget" ]} ' ,
617
+ 'object_label' : row ['mapTargetName' ],
618
+ # If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
619
+ # ...field for 'match_type'.
620
+ 'match_type' : match_type ,
621
+ 'mapping_date' : date_parser .parse (str (row ['effectiveTime' ])).date (),
622
+ # 'xxx2': 'yyy',
623
+ # 'xxx3': 'yyy',
624
+ # 'xxx4': 'yyy',
625
+ # 'xxx5': 'yyy',
626
+ # 'xxx6': 'yyy',
627
+ }
628
+ mlist .append (_prepare_mapping (Mapping (** mdict )))
629
+
630
+ ms .mappings = mlist # type:ignore
631
+ _set_metadata_in_mapping_set (mapping_set = ms , metadata = meta )
632
+ doc = MappingSetDocument (mapping_set = ms , prefix_map = prefix_map )
633
+ return to_mapping_set_dataframe (doc )
634
+
635
+
502
636
# All from_* take as an input a python object (data frame, json, etc) and return a MappingSetDataFrame
503
637
# All read_* take as an input a a file handle and return a MappingSetDataFrame (usually wrapping a from_* method)
504
638
@@ -523,6 +657,9 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
523
657
return read_alignment_xml
524
658
elif input_format == "obographs-json" :
525
659
return read_obographs_json
660
+ elif input_format == "snomed-icd10cm-map-tsv" :
661
+ return read_snomed_icd10cm_map_tsv
662
+
526
663
else :
527
664
raise Exception (f"Unknown input format: { input_format } " )
528
665
0 commit comments