Skip to content

Commit e62c362

Browse files
committed
removed the logic to collect cache files associated with filtering the dataset
1 parent eddf318 commit e62c362

File tree

3 files changed

+1
-6
lines changed

3 files changed

+1
-6
lines changed

src/cehrbert/data_generators/hf_data_generator/hf_dataset.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ def create_cehrbert_pretraining_dataset(
4343

4444
# Remove patients without any records
4545
dataset = filter_dataset(dataset, data_args)
46-
if cache_file_collector:
47-
cache_file_collector.add_cache_files(dataset)
4846
# If the data is already in meds, we don't need to sort the sequence anymore
4947
if data_args.is_data_in_meds:
5048
mapping_functions = [HFTokenizationMapping(concept_tokenizer, True)]
@@ -85,8 +83,6 @@ def create_cehrbert_finetuning_dataset(
8583

8684
# Remove patients without any records
8785
dataset = filter_dataset(dataset, data_args)
88-
if cache_file_collector:
89-
cache_file_collector.add_cache_files(dataset)
9086
if data_args.is_data_in_meds:
9187
mapping_functions = [
9288
HFFineTuningMapping(),

src/cehrbert/runners/hf_cehrbert_pretrain_runner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ def main():
230230
f"validation_split_num: {data_args.validation_split_num}\n"
231231
f"streaming: {data_args.streaming}"
232232
)
233-
cache_file_collector.add_cache_files(dataset)
234233
# Create the CEHR-BERT tokenizer if it's not available in the output folder
235234
tokenizer = load_and_create_tokenizer(data_args=data_args, model_args=model_args, dataset=dataset)
236235
# sort the patient features chronologically and tokenize the data

tests/integration_tests/runners/hf_cehrbert_pretrain_runner_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_train_model(self):
6767
"10",
6868
"--save_strategy",
6969
"steps",
70-
"--evaluation_strategy",
70+
"--eval_strategy",
7171
"steps",
7272
"--do_train",
7373
"true",

0 commit comments

Comments
 (0)