Skip to content

Commit 4c0495e

Browse files
authored
feat(medcat): CU-869aknekf Add mapping to ontologies (#147)
* CU-869aknekf: Remove simple_hash config option. It's an unused relic from v1 * CU-869aknekf: Add config option for mapping to other ontologies * CU-869aknekf: Implement mapping to other ontologies * CU-869aknekf: Fix typing issue with ontology mappings * CU-869aknekf: Add a few tests to check mappings are appropriately added * CU-869aknekf: Remove extra whitespace * CU-869aknekf: Removed TODO that has been addressed regarding output * CU-869aknekf: Move logged message from debug to warning
1 parent 5b62d27 commit 4c0495e

File tree

3 files changed

+63
-12
lines changed

3 files changed

+63
-12
lines changed

medcat-v2/medcat/cat.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -523,22 +523,34 @@ def _get_entity(self, ent: MutableEntity,
523523
'context_similarity': ent.context_similarity,
524524
'start': ent.base.start_char_index,
525525
'end': ent.base.end_char_index,
526-
# TODO: add additional info (i.e mappings)
527-
# for addl in addl_info:
528-
# tmp = self.cdb.addl_info.get(addl, {}).get(cui, [])
529-
# out_ent[addl.split("2")[-1]] = list(tmp) if type(tmp) is
530-
# set else tmp
531526
'id': ent.id,
532-
# TODO: add met annotations
533-
# if hasattr(ent._, 'meta_anns') and ent._.meta_anns:
534-
# out_ent['meta_anns'] = ent._.meta_anns
535527
'meta_anns': {},
536528
'context_left': left_context,
537529
'context_center': center_context,
538530
'context_right': right_context,
539531
}
540532
# addons:
541533
out_dict.update(self.get_addon_output(ent)) # type: ignore
534+
# other ontologies
535+
if self.config.general.map_to_other_ontologies:
536+
for ont in self.config.general.map_to_other_ontologies:
537+
if ont in out_dict:
538+
logger.warning(
539+
"Trying to map to ontology '%s', but it already "
540+
"exists in the out dict, so unable to add it. "
541+
"If this is for an actual ontology that shares a "
542+
"name with something else, cosider renaming the "
543+
"mapping in `cdb.addl_info`")
544+
continue
545+
addl_info_name = f"cui2{ont}"
546+
if addl_info_name not in self.cdb.addl_info:
547+
logger.warning(
548+
"Trying to map to ontology '%s' but it is not set in "
549+
"addl_info so unable to do so", ont)
550+
continue
551+
ont_map = self.cdb.addl_info[addl_info_name]
552+
ont_values = ont_map.get(cui, [])
553+
out_dict[ont] = ont_values # type: ignore
542554
return out_dict
543555

544556
def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]:

medcat-v2/medcat/config/config.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,17 @@ class General(SerialisableBaseModel):
252252
map_cui_to_group: bool = False
253253
"""If the cdb.addl_info['cui2group'] is provided and this option enabled,
254254
each CUI will be mapped to the group"""
255-
simple_hash: bool = False
256-
"""Whether to use a simple hash.
255+
map_to_other_ontologies: list[str] = ["opcs4", "icd10"]
256+
"""Which other ontologies to map to if possible.
257257
258-
NOTE: While using a simple hash is faster at save time, it is less
259-
reliable due to not taking into account all the details of the changes."""
258+
This will force medcat to include mapping for other ontologies in
259+
its outputs. It will use the mappings in `cdb.addl_info["cui2<ont>"]`
260+
are present.
261+
262+
NB!
263+
This will only work if the `cdb.addl_info["cui2<ont>"]` exists.
264+
Otherwise, no mapping will be done.
265+
"""
260266

261267
class Config:
262268
extra = 'allow'

medcat-v2/tests/test_cat.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,39 @@ def test_entities_in_correct_order(self):
159159
cur_start = ent.base.start_char_index
160160

161161

162+
class InferenceIntoOntologyTests(TrainedModelTests):
163+
ont_name = "FAKE_ONT"
164+
165+
@classmethod
166+
def setUpClass(cls):
167+
super().setUpClass()
168+
# create mapping
169+
cls.ont_map = {
170+
cui: [f"{cls.ont_name}:{cui}"]
171+
for cui in cls.model.cdb.cui2info
172+
}
173+
# add to addl_info
174+
cls.model.cdb.addl_info[f"cui2{cls.ont_name}"] = cls.ont_map
175+
# ask to be mapped
176+
cls.model.config.general.map_to_other_ontologies.append(cls.ont_name)
177+
178+
def assert_has_mapping(self, ent: dict):
179+
# has value
180+
self.assertIn(self.ont_name, ent)
181+
val = ent[self.ont_name]
182+
# 1 value
183+
self.assertEqual(len(val), 1)
184+
# value in our map
185+
self.assertIn(val, self.ont_map.values())
186+
187+
def test_gets_mappings(self):
188+
ents = self.model.get_entities(
189+
ConvertedFunctionalityTests.TEXT)['entities']
190+
for nr, ent in enumerate(ents.values()):
191+
with self.subTest(f"{nr}"):
192+
self.assert_has_mapping(ent)
193+
194+
162195
class CATIncludingTests(unittest.TestCase):
163196
TOKENIZING_PROVIDER = 'regex'
164197
EXPECT_TRAIN = {}

0 commit comments

Comments
 (0)