|
20 | 20 | from linkml_runtime.loaders.json_loader import JSONLoader
|
21 | 21 | from rdflib import Graph, URIRef
|
22 | 22 |
|
23 |
| -# TODO: PR comment: where matchtypeenum? can't find sssomschema, Mapping, or MappingSet. only MappingSetDataFrame |
24 |
| -# from .sssom_datamodel import Mapping, MappingSet, MatchTypeEnum |
25 | 23 | from sssom_schema import Mapping, MappingSet
|
26 | 24 |
|
27 | 25 |
|
@@ -268,17 +266,21 @@ def parse_snomed_complex_map_tsv(
|
268 | 266 | file_path: str,
|
269 | 267 | prefix_map: Dict[str, str] = None,
|
270 | 268 | meta: Dict[str, str] = None,
|
| 269 | + filter_by_confident_mappings=True |
271 | 270 | ) -> MappingSetDataFrame:
|
272 | 271 | """Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
|
273 | 272 |
|
274 |
| - :param file_path: The path to the obographs file |
| 273 | + :param file_path: The path to the source file |
275 | 274 | :param prefix_map: an optional prefix map
|
276 | 275 | :param meta: an optional dictionary of metadata elements
|
| 276 | + :param filter_by_confident_mappings: Will only include mapping rows where the `mapAdvice` field includes an 'ALWAYS |
| 277 | + <code>' pattern. |
277 | 278 | :return: A SSSOM MappingSetDataFrame
|
278 | 279 | """
|
279 | 280 | raise_for_bad_path(file_path)
|
280 | 281 | df = read_pandas(file_path)
|
281 |
| - df2 = from_snomed_complex_map_tsv(df, prefix_map=prefix_map, meta=meta) |
| 282 | + df2 = from_snomed_complex_map_tsv( |
| 283 | + df, prefix_map=prefix_map, meta=meta, filter_by_confident_mappings=filter_by_confident_mappings) |
282 | 284 | return df2
|
283 | 285 |
|
284 | 286 |
|
@@ -691,12 +693,15 @@ def from_snomed_complex_map_tsv(
|
691 | 693 | df: pd.DataFrame,
|
692 | 694 | prefix_map: Optional[PrefixMap] = None,
|
693 | 695 | meta: Optional[MetadataType] = None,
|
| 696 | + filter_by_confident_mappings=True |
694 | 697 | ) -> MappingSetDataFrame:
|
695 | 698 | """Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
|
696 | 699 |
|
697 | 700 | :param df: A mappings dataframe
|
698 | 701 | :param prefix_map: A prefix map
|
699 | 702 | :param meta: A metadata dictionary
|
| 703 | + :param filter_by_confident_mappings: Will only include mapping rows where the `mapAdvice` field includes an 'ALWAYS |
| 704 | + <code>' pattern. |
700 | 705 | :return: MappingSetDataFrame
|
701 | 706 |
|
702 | 707 | # Field descriptions
|
@@ -730,11 +735,23 @@ def from_snomed_complex_map_tsv(
|
730 | 735 | - mapCategoryId,SctId,Identifies the SNOMED CT concept in the metadata hierarchy which is the MapCategory for the
|
731 | 736 | associated map record. This is a subtype of 447634004 |ICD-10 Map Category value|.,
|
732 | 737 | """
|
| 738 | + # Local variables |
733 | 739 | # https://www.findacode.com/snomed/447561005--snomed-ct-source-code-to-target-map-correlation-not-specified.html
|
734 |
| - match_type_snomed_unspecified_id = 447561005 |
| 740 | + mapping_justification_snomed_unspecified_id = 447561005 |
| 741 | + # - Note: joeflack4: I used this info as a reference for this pattern. |
| 742 | + # https://www.medicalbillingandcoding.org/icd-10-cm/#:~:text=ICD%2D10%2DCM%20is%20a,decimal%20point%20and%20the%20subcategory. |
| 743 | + always_confidence_pattern = 'ALWAYS [A-Z]{1}[0-9]{1,2}\.[0-9A-Z]{1,4}' |
| 744 | + always_confidence_antipattern = always_confidence_pattern + '\?' |
735 | 745 | prefix_map = _ensure_prefix_map(prefix_map)
|
736 | 746 | ms = _init_mapping_set(meta)
|
737 | 747 |
|
| 748 | + # Filtering |
| 749 | + if filter_by_confident_mappings: |
| 750 | + df = df[ |
| 751 | + (df['mapAdvice'].str.contains(always_confidence_pattern, regex=True, na=False)) & |
| 752 | + (~df['mapAdvice'].str.contains(always_confidence_antipattern, regex=True, na=False))] |
| 753 | + |
| 754 | + # Map mappings |
738 | 755 | mlist: List[Mapping] = []
|
739 | 756 | for _, row in df.iterrows():
|
740 | 757 | mdict = {
|
@@ -766,16 +783,20 @@ def from_snomed_complex_map_tsv(
|
766 | 783 | 'object_id': f'ICD10CM:{row["mapTarget"]}',
|
767 | 784 | 'object_label': row['mapTargetName'],
|
768 | 785 |
|
769 |
| - # match_type <- mapRule? |
| 786 | + # mapping_justification <- mapRule? |
770 | 787 | # ex: TRUE: when "ALWAYS <code>" is in pipe-delimited list in mapAdvice, this always shows TRUE. Does this
|
771 | 788 | # mean I could use skos:exactMatch in these cases?
|
772 |
| - # match_type <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded |
| 789 | + # mapping_justification <- correlationId?: This may look redundant, but I want to be explicit. In officially downloaded |
773 | 790 | # SNOMED mappings, all of them had correlationId of 447561005, which also happens to be 'unspecified'.
|
774 | 791 | # If correlationId is indeed more appropriate for predicate_id, then I don't think there is a representative
|
775 |
| - # field for 'match_type'. |
776 |
| - 'match_type': MatchTypeEnum('Unspecified') if row['correlationId'] == match_type_snomed_unspecified_id \ |
777 |
| - else MatchTypeEnum('Unspecified'), |
778 |
| - |
| 792 | + # field for 'mapping_justification'. |
| 793 | + # TODO: How to properly get mapping_justification? |
| 794 | + # I think I need to use sssom_schema.slots.mapping_justification, but not sure how to use. |
| 795 | + # slots.mapping_justification = Slot(uri=SSSOM.mapping_justification, name="mapping_justification", curie=SSSOM.curie('mapping_justification'), |
| 796 | + # model_uri=SSSOM.mapping_justification, domain=None, range=Union[str, EntityReference], |
| 797 | + # pattern=re.compile(r'^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining)$')) |
| 798 | + 'mapping_justification': |
| 799 | + 'Unspecified' if row['correlationId'] == mapping_justification_snomed_unspecified_id else 'Unspecified', |
779 | 800 | 'mapping_date': date_parser.parse(str(row['effectiveTime'])).date(),
|
780 | 801 | 'other': '|'.join([f'{k}={str(row[k])}' for k in [
|
781 | 802 | 'id',
|
|
0 commit comments