Skip to content

Commit c0594c9

Browse files
committed
added a check on the concept_value
1 parent 0b4ef25 commit c0594c9

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

src/cehrbert/data_generators/hf_data_generator/hf_dataset_mapping.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,10 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
582582
if "concept_values" not in record:
583583
record["concept_values"] = record["number_as_values"]
584584

585-
if np.isnan(record["concept_values"]).any():
586-
record["concept_values"] = [v if not pd.isna(v) else 0.0 for v in record["concept_values"]]
585+
concept_value_is_nan = np.isnan(record["concept_values"])
586+
if concept_value_is_nan.any():
587+
concept_value_masks[concept_value_is_nan] = 0
588+
record["concept_value_masks"] = concept_value_masks
587589

588590
assert len(input_ids) == len(
589591
record["concept_ids"]

src/cehrbert/models/hf_models/tokenization_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def map_statistics(batch: Dict[str, Any], capacity=100, value_outlier_std=2.0) -
4242
for concept_id, concept_value, concept_value_indicator, unit in zip(
4343
concept_ids, concept_values, concept_value_indicators, units
4444
):
45-
if concept_value_indicator == 1:
45+
if concept_value_indicator == 1 and concept_value:
4646
numeric_stats_by_lab[(concept_id, unit)].add(1, concept_value)
4747
return {"numeric_stats_by_lab": numeric_stats_by_lab}
4848

0 commit comments

Comments
 (0)