Skip to content

Commit 475af31

Browse files
committed
set None unit to a default value N/A
1 parent 422481c commit 475af31

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from meds.schema import birth_code, death_code
1818
from pandas import Series
1919

20-
from cehrbert.med_extension.schema_extension import Event, Visit
20+
from cehrbert.med_extension.schema_extension import Event
2121
from cehrbert.models.hf_models.tokenization_hf_cehrbert import CehrBertTokenizer
2222
from cehrbert.runners.hf_runner_argument_dataclass import DataTrainingArguments
2323

@@ -578,6 +578,13 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
578578
input_ids = self._concept_tokenizer.encode(record["concept_ids"])
579579
record["input_ids"] = input_ids
580580
concept_value_masks = record["concept_value_masks"]
581+
units = record["units"]
582+
none_values = np.array([x is None for x in units])
583+
if none_values.any():
584+
units = record["units"].copy()
585+
units[none_values] = NA
586+
record["units"] = units
587+
581588
# Backward compatibility
582589
if "concept_values" not in record:
583590
record["concept_values"] = record["number_as_values"]

0 commit comments

Comments
 (0)