Skip to content

Commit 7d90e91

Browse files
committed
New Feature: SNOMED::ICD10CM Mapping Support
- Added feature to allow for conversion of these premade mappings provided by SNOMED into SSSOM format. (WIP) General updates - cli.py: Reorganized SSSOM_READ_FORMATS: Top half are plain data formats, and bottom half are special-case formats. Both halves of the list are alphabetically sorted. Tentative updates - Changed some relative imports to absolute imports, in order to speed up development and make debugging easier. It is possible that this could be a good permanent change too, though. Dev updates - requirements-unlocked.txt: Unversioned variation of requirements.txt - requirements.txt: Included this because I don't know if setup.py is sufficient to install packages in a standalone dev environment. Also, it is shown to be expected in the documentation: https://mapping-commons.github.io/sssom-py/installation.html#installation-for-developers
1 parent 52ad5ae commit 7d90e91

File tree

5 files changed

+224
-10
lines changed

5 files changed

+224
-10
lines changed

requirements-unlocked.txt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
pyparsing
2+
click
3+
linkml-runtime
4+
networkx
5+
numpy
6+
pandas
7+
pandasql
8+
pyyaml
9+
rdflib
10+
recommonmark
11+
scikit_learn
12+
scipy
13+
setuptools
14+
sparqlwrapper
15+
validators
16+
# joeflack4 2022/02/24: I commented these specific versions out for now because of these errors:
17+
# ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
18+
# sparqlslurper 0.4.1 requires rdflib~=5.0, but you have rdflib 6.1.1 which is incompatible.
19+
# pyshexc 0.8.3 requires rdflib~=5.0, but you have rdflib 6.1.1 which is incompatible.
20+
# pyshex 0.7.20 requires rdflib~=5.0, but you have rdflib 6.1.1 which is incompatible.
21+
# linkml 1.0.2 requires rdflib~=5.0, but you have rdflib 6.1.1 which is incompatible.
22+
# linkml-model 1.0.0 requires rdflib~=5.0, but you have rdflib 6.1.1 which is incompatible.
23+
# pyparsing==2.4.7
24+
# click
25+
# linkml-runtime>=1.1.12
26+
# networkx
27+
# numpy
28+
# pandas
29+
# pandasql
30+
# pyyaml
31+
# rdflib>=6
32+
# recommonmark>=0.7
33+
# scikit_learn
34+
# scipy
35+
# setuptools
36+
# sparqlwrapper
37+
# validators
38+
# validators>=0.0

requirements.txt

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
alabaster==0.7.12
2+
antlr4-python3-runtime==4.9.2
3+
appdirs==1.4.4
4+
attrs==21.2.0
5+
Babel==2.9.1
6+
cachetools==4.2.2
7+
certifi==2021.5.30
8+
CFGraph==0.2.1
9+
chardet==4.0.0
10+
charset-normalizer==2.0.3
11+
click==7.1.2
12+
commonmark==0.9.1
13+
decorator==4.4.2
14+
Deprecated==1.2.12
15+
distlib==0.3.2
16+
docutils==0.17.1
17+
filelock==3.0.12
18+
frozendict==2.0.3
19+
graphviz==0.17
20+
greenlet==1.1.0
21+
hbreader==0.9.1
22+
idna==3.2
23+
imagesize==1.2.0
24+
iniconfig==1.1.1
25+
isodate==0.6.0
26+
Jinja2==3.0.1
27+
joblib==1.0.1
28+
json-flattener==0.1.7
29+
jsonasobj==1.2.1
30+
jsonasobj2==1.0.4
31+
jsonschema==3.2.0
32+
linkml==1.0.2
33+
linkml-model==1.0.0
34+
linkml-runtime==1.1.24
35+
lxml==4.6.3
36+
MarkupSafe==2.0.1
37+
networkx==2.5.1
38+
numpy==1.21.1
39+
packaging==21.0
40+
pandas==1.1.5
41+
pandasql==0.7.3
42+
parse==1.19.0
43+
pbr==5.6.0
44+
pluggy==0.13.1
45+
prefixcommons==0.1.9
46+
prologterms==0.0.6
47+
py==1.10.0
48+
Pygments==2.9.0
49+
PyJSG==0.11.6
50+
PyLD==2.0.3
51+
PyLDmod==2.0.5
52+
pyparsing==2.4.7
53+
pyrsistent==0.18.0
54+
PyShEx==0.7.20
55+
PyShExC==0.8.3
56+
pytest==6.2.4
57+
python-dateutil==2.8.2
58+
pytz==2021.1
59+
PyYAML==5.4.1
60+
rdflib==6.1.1
61+
rdflib-jsonld==0.6.1
62+
rdflib-pyld-compat==0.1.0
63+
rdflib-pyldmod-compat==0.1.2
64+
rdflib-shim==1.0.3
65+
recommonmark==0.7.1
66+
requests==2.26.0
67+
scikit-learn==0.24.2
68+
scipy==1.7.0
69+
ShExJSG==0.7.1
70+
six==1.16.0
71+
snowballstemmer==2.1.0
72+
sparqlslurper==0.4.1
73+
SPARQLWrapper==1.8.5
74+
Sphinx==2.4.4
75+
sphinx-click==2.3.1
76+
sphinx-rtd-theme==0.4.3
77+
sphinxcontrib-applehelp==1.0.2
78+
sphinxcontrib-devhelp==1.0.2
79+
sphinxcontrib-htmlhelp==2.0.0
80+
sphinxcontrib-jsmath==1.0.1
81+
sphinxcontrib-qthelp==1.0.3
82+
sphinxcontrib-serializinghtml==1.1.5
83+
SQLAlchemy==1.4.21
84+
stevedore==3.3.0
85+
testfixtures==6.18.0
86+
threadpoolctl==2.2.0
87+
toml==0.10.2
88+
tox==3.23.1
89+
urllib3==1.26.6
90+
validators==0.18.2
91+
virtualenv==20.4.7
92+
virtualenv-clone==0.5.4
93+
virtualenvwrapper==4.8.4
94+
watchdog==2.1.3
95+
wrapt==1.12.1
96+
zipp==3.5.0

sssom/cli.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
from rdflib import Graph
2525
from scipy.stats import chi2_contingency
2626

27-
from .cliques import split_into_cliques, summarize_cliques
28-
from .io import convert_file, parse_file, split_file, validate_file
29-
from .parsers import read_sssom_table
30-
from .rdf_util import rewire_graph
31-
from .sparql_util import EndpointConfig, query_mappings
32-
from .util import (
27+
from sssom.cliques import split_into_cliques, summarize_cliques
28+
from sssom.io import convert_file, parse_file, split_file, validate_file
29+
from sssom.parsers import read_sssom_table
30+
from sssom.rdf_util import rewire_graph
31+
from sssom.sparql_util import EndpointConfig, query_mappings
32+
from sssom.util import (
3333
SSSOM_EXPORT_FORMATS,
3434
SSSOM_READ_FORMATS,
3535
MappingSetDataFrame,
@@ -41,7 +41,7 @@
4141
remove_unmatched,
4242
to_mapping_set_dataframe,
4343
)
44-
from .writers import write_table
44+
from sssom.writers import write_table
4545

4646
# Click input options common across commands
4747
input_argument = click.argument("input", required=True, type=click.Path())

sssom/parsers.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,46 @@ def read_obographs_json(
140140
)
141141

142142

143+
def read_snomed_icd10cm_map_tsv(
144+
file_path: str,
145+
prefix_map: Dict[str, str] = None,
146+
meta: Dict[str, str] = None,
147+
) -> MappingSetDataFrame:
148+
"""Parse special SNOMED ICD10CM mapping file and translates it into a MappingSetDataFrame.
149+
150+
:param file_path: The path to the obographs file
151+
:param prefix_map: an optional prefix map
152+
:param meta: an optional dictionary of metadata elements
153+
:return: A SSSOM MappingSetDataFrame
154+
"""
155+
raise_for_bad_path(file_path)
156+
df = read_pandas(file_path)
157+
158+
# TODO: DRYify: Put this in a func as is used multiple times now. - joeflack4 2022/02/24
159+
# If SSSOM external metadata is provided, merge it with the internal metadata
160+
sssom_metadata = _read_metadata_from_table(file_path)
161+
if sssom_metadata:
162+
if meta:
163+
for k, v in meta.items():
164+
if k in sssom_metadata:
165+
if sssom_metadata[k] != v:
166+
logging.warning(
167+
f"SSSOM internal metadata {k} ({sssom_metadata[k]}) "
168+
f"conflicts with provided ({meta[k]})."
169+
)
170+
else:
171+
logging.info(
172+
f"Externally provided metadata {k}:{v} is added to metadata set."
173+
)
174+
sssom_metadata[k] = v
175+
meta = sssom_metadata
176+
177+
prefix_map, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)
178+
179+
df2 = from_snomed_icd10cm_map_tsv(df, prefix_map=prefix_map, meta=meta)
180+
return df2
181+
182+
143183
def _get_prefix_map_and_metadata(
144184
prefix_map: Optional[PrefixMap] = None, meta: Optional[MetadataType] = None
145185
) -> Metadata:
@@ -499,6 +539,42 @@ def from_obographs(
499539
return to_mapping_set_dataframe(mdoc)
500540

501541

542+
def from_snomed_icd10cm_map_tsv(
543+
df: pd.DataFrame,
544+
*,
545+
prefix_map: Optional[PrefixMap] = None,
546+
meta: Optional[MetadataType] = None,
547+
) -> MappingSetDataFrame:
548+
"""Convert a snomed_icd10cm_map dataframe to a MappingSetDataFrame.
549+
550+
:param df: A mappings dataframe
551+
:param prefix_map: A prefix map
552+
:param meta: A metadata dictionary
553+
:return: MappingSetDataFrame
554+
"""
555+
prefix_map = _ensure_prefix_map(prefix_map)
556+
557+
# TODO: repurpose boilerplate below
558+
if "confidence" in df.columns:
559+
df["confidence"].replace(r"^\s*$", np.NaN, regex=True, inplace=True)
560+
561+
mlist: List[Mapping] = []
562+
ms = _init_mapping_set(meta)
563+
bad_attrs: typing.Counter[str] = Counter()
564+
for _, row in df.iterrows():
565+
mdict, ms, bad_attrs = _get_mdict_ms_and_bad_attrs(row, ms, bad_attrs)
566+
mlist.append(_prepare_mapping(Mapping(**mdict)))
567+
568+
for k, v in bad_attrs.most_common():
569+
logging.warning(f"No attr for {k} [{v} instances]")
570+
# the autogenerated code's type annotations are _really_ messy. This is in fact okay,
571+
# so with a heavy heart we employ type:ignore
572+
ms.mappings = mlist # type:ignore
573+
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
574+
doc = MappingSetDocument(mapping_set=ms, prefix_map=prefix_map)
575+
return to_mapping_set_dataframe(doc)
576+
577+
502578
# All from_* take as an input a python object (data frame, json, etc) and return a MappingSetDataFrame
503579
# All read_* take as an input a a file handle and return a MappingSetDataFrame (usually wrapping a from_* method)
504580

@@ -523,6 +599,9 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
523599
return read_alignment_xml
524600
elif input_format == "obographs-json":
525601
return read_obographs_json
602+
elif input_format == "snomed-icd10cm-map-tsv":
603+
return read_snomed_icd10cm_map_tsv
604+
526605
else:
527606
raise Exception(f"Unknown input format: {input_format}")
528607

sssom/util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,13 @@
4343
PREFIX_MAP_KEY = "curie_map"
4444

4545
SSSOM_READ_FORMATS = [
46-
"tsv",
47-
"rdf",
46+
"json",
4847
"owl",
48+
"rdf",
49+
"tsv",
4950
"alignment-api-xml",
5051
"obographs-json",
51-
"json",
52+
"snomed-icd10cm-map-tsv"
5253
]
5354
SSSOM_EXPORT_FORMATS = ["tsv", "rdf", "owl", "json"]
5455

0 commit comments

Comments
 (0)