Skip to content

Commit ae72662

Browse files
committed
CU-8699np02n: Update CDB legacy conversion so that it works with CDBs with no name_isupper attribute (#28)
1 parent 046ea20 commit ae72662

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

medcat-v2/medcat/utils/legacy/convert_cdb.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def load_old_raw_data(old_path: str) -> dict:
6262
]
6363
NAME2KEYS = {'name2cuis', 'name2cuis2status', 'name2count_train',
6464
'name_isupper'}
65+
OPTIONAL_NAME2_KEYS = {"name_isupper", }
6566
CUI2KEYS = {'cui2names', 'cui2snames', 'cui2context_vectors',
6667
'cui2count_train', 'cui2info', 'cui2tags', 'cui2type_ids',
6768
'cui2preferred_name', 'cui2average_confidence', }
@@ -167,6 +168,8 @@ def _add_cui_info(cdb: CDB, data: dict) -> CDB:
167168
def _add_name_info(cdb: CDB, data: dict) -> CDB:
168169
all_names = set()
169170
for key in NAME2KEYS:
171+
if key in OPTIONAL_NAME2_KEYS and key not in data:
172+
continue
170173
cnames = data[key].keys()
171174
logger.debug("Adding %d names based on '%s", len(cnames), key)
172175
all_names.update(cnames)
@@ -181,7 +184,7 @@ def _add_name_info(cdb: CDB, data: dict) -> CDB:
181184
# so v2 only uses the latter since it provides extra information
182185
name2cuis2status = data['name2cuis2status']
183186
name2cnt_train = data['name2count_train']
184-
name2is_upper = data['name_isupper']
187+
name2is_upper = data.get('name_isupper', {})
185188
for name in all_names:
186189
cuis2status: dict[str, str] = {}
187190
_cuis2status = name2cuis2status.get(name, {})

0 commit comments

Comments
 (0)