Skip to content

Commit 56538d0

Browse files
authored
removed all legacy CEHR-BERT code using tensorflow and keras (#125)
* removed all legacy CEHR-BERT code using tensorflow and keras * try freeing up space in the github VM * restrict the package version
1 parent fb5d8ab commit 56538d0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+160
-7758
lines changed

.github/workflows/tests.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ jobs:
2323
uses: actions/setup-python@v3
2424
with:
2525
python-version: "3.10"
26+
27+
- name: Free up disk space
28+
run: |
29+
sudo rm -rf /usr/share/dotnet
30+
sudo rm -rf /opt/ghc
31+
sudo rm -rf "/usr/local/share/boost"
32+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
33+
sudo docker image prune --all --force
34+
df -h
35+
2636
- name: Install dependencies
2737
run: |
2838
python -m pip install --upgrade pip

constraints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
xformers==0 # blocks installation

pyproject.toml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,15 @@ dependencies = [
3939
"numpy>=1.24.3",
4040
"packaging>=23.2",
4141
"pandas>=2.2.0",
42-
"peft>=0.10.0",
42+
"peft==0.10.0",
4343
"Pillow>=10.3.0",
4444
"pyarrow>=15.0.0",
4545
"python-dateutil==2.8.2",
4646
"PyYAML==6.0.1",
4747
"scikit-learn==1.4.0",
4848
"scipy==1.12.0",
49-
"tensorflow>=2.15.0",
50-
"keras==2.15.0",
51-
"tensorflow-metal>=1.1.0; sys_platform == 'darwin'", # macOS only
52-
"tensorflow-datasets>=4.5.2",
5349
"tqdm>=4.66.1",
54-
"torch==2.4.0",
50+
"torch>=2.4.0, <=2.8.0",
5551
"tokenizers>=0.19.0",
5652
"transformers>=4.41.0, <= 4.45.0",
5753
"accelerate>=0.31.0",
File renamed without changes.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pandas as pd
2+
3+
from .evaluation_parse_args import create_evaluation_args
4+
from .model_evaluators.frequency_model_evaluators import (
5+
LogisticRegressionModelEvaluator,
6+
XGBClassifierEvaluator,
7+
)
8+
9+
10+
def evaluate_baseline_models(args):
11+
# Load the training data
12+
dataset = pd.read_parquet(args.data_path)
13+
test_person_ids = None
14+
if args.patient_splits_folder:
15+
patient_splits = pd.read_parquet(args.patient_splits_folder)
16+
test_person_ids = patient_splits[patient_splits.split == "test"]
17+
18+
LogisticRegressionModelEvaluator(
19+
dataset=dataset,
20+
evaluation_folder=args.evaluation_folder,
21+
num_of_folds=args.num_of_folds,
22+
is_transfer_learning=args.is_transfer_learning,
23+
training_percentage=args.training_percentage,
24+
k_fold_test=args.k_fold_test,
25+
test_person_ids=test_person_ids,
26+
).eval_model()
27+
28+
XGBClassifierEvaluator(
29+
dataset=dataset,
30+
evaluation_folder=args.evaluation_folder,
31+
num_of_folds=args.num_of_folds,
32+
is_transfer_learning=args.is_transfer_learning,
33+
training_percentage=args.training_percentage,
34+
k_fold_test=args.k_fold_test,
35+
test_person_ids=test_person_ids,
36+
).eval_model()
37+
38+
39+
def main(args):
40+
evaluate_baseline_models(args)
41+
42+
43+
if __name__ == "__main__":
44+
main(create_evaluation_args().parse_args())
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import argparse
2+
3+
4+
def create_evaluation_args():
5+
main_parser = argparse.ArgumentParser(description="Arguments for evaluating the models")
6+
main_parser.add_argument(
7+
"-d",
8+
"--data_path",
9+
dest="data_path",
10+
action="store",
11+
help="The training data path",
12+
required=True,
13+
)
14+
main_parser.add_argument(
15+
"--patient_splits_folder",
16+
dest="patient_splits_folder",
17+
action="store",
18+
help="The test person_ids data",
19+
required=False,
20+
)
21+
main_parser.add_argument(
22+
"-ef",
23+
"--evaluation_folder",
24+
dest="evaluation_folder",
25+
action="store",
26+
required=True,
27+
)
28+
main_parser.add_argument(
29+
"-n",
30+
"--num_of_folds",
31+
dest="num_of_folds",
32+
action="store",
33+
required=False,
34+
type=int,
35+
default=4,
36+
)
37+
main_parser.add_argument("--is_transfer_learning", dest="is_transfer_learning", action="store_true")
38+
main_parser.add_argument(
39+
"--training_percentage",
40+
dest="training_percentage",
41+
required=False,
42+
action="store",
43+
type=float,
44+
default=1.0,
45+
)
46+
return main_parser

src/cehrbert/evaluations/__init__.py renamed to src/cehrbert/baseline_evaluation/model_evaluators/__init__.py

File renamed without changes.

src/cehrbert/evaluations/model_evaluators/frequency_model_evaluators.py renamed to src/cehrbert/baseline_evaluation/model_evaluators/frequency_model_evaluators.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tensorflow.keras.preprocessing.text import Tokenizer
1111
from xgboost import XGBClassifier
1212

13-
from cehrbert.evaluations.model_evaluators.model_evaluators import AbstractModelEvaluator
13+
from .model_evaluators import AbstractModelEvaluator
1414
from cehrbert.utils.model_utils import compute_binary_metrics
1515

1616

src/cehrbert/evaluations/model_evaluators/model_evaluators.py renamed to src/cehrbert/baseline_evaluation/model_evaluators/model_evaluators.py

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,62 @@
11
import copy
22
import os
33
import pathlib
4-
from abc import abstractmethod
4+
import logging
5+
from abc import ABC, abstractmethod
6+
from cehrbert.utils.model_utils import create_folder_if_not_exist
57

6-
import tensorflow as tf
78

8-
from cehrbert.trainers.model_trainer import AbstractModel
9+
class AbstractModel(ABC):
10+
def __init__(self, *args, **kwargs):
11+
super().__init__()
12+
self._model = self._create_model(*args, **kwargs)
913

14+
@abstractmethod
15+
def _create_model(self, *args, **kwargs):
16+
pass
17+
18+
@abstractmethod
19+
def train_model(self, *args, **kwargs):
20+
pass
21+
22+
@abstractmethod
23+
def get_model_folder(self):
24+
pass
25+
26+
def get_model_metrics_folder(self):
27+
return create_folder_if_not_exist(self.get_model_folder(), "metrics")
28+
29+
def get_model_test_metrics_folder(self):
30+
return create_folder_if_not_exist(self.get_model_folder(), "test_metrics")
31+
32+
def get_model_test_prediction_folder(self):
33+
return create_folder_if_not_exist(self.get_model_folder(), "test_prediction")
1034

11-
def get_metrics():
12-
"""
13-
Standard metrics used for compiling the models.
35+
def get_model_history_folder(self):
36+
return create_folder_if_not_exist(self.get_model_folder(), "history")
1437

15-
:return:
16-
"""
38+
@classmethod
39+
def get_logger(cls):
40+
return logging.getLogger(cls.__name__)
1741

18-
return [
19-
"binary_accuracy",
20-
tf.keras.metrics.Recall(name="recall"),
21-
tf.keras.metrics.Precision(name="precision"),
22-
tf.keras.metrics.AUC(curve="PR", name="pr_auc"),
23-
tf.keras.metrics.AUC(name="auc"),
24-
]
42+
def __str__(self):
43+
return str(self.__class__.__name__)
2544

2645

2746
class AbstractModelEvaluator(AbstractModel):
2847
def __init__(
29-
self,
30-
dataset,
31-
evaluation_folder,
32-
num_of_folds,
33-
is_transfer_learning: bool = False,
34-
training_percentage: float = 1.0,
35-
learning_rate: float = 1e-4,
36-
is_chronological_test: bool = False,
37-
k_fold_test: bool = False,
38-
test_person_ids=None,
39-
*args,
40-
**kwargs,
48+
self,
49+
dataset,
50+
evaluation_folder,
51+
num_of_folds,
52+
is_transfer_learning: bool = False,
53+
training_percentage: float = 1.0,
54+
learning_rate: float = 1e-4,
55+
is_chronological_test: bool = False,
56+
k_fold_test: bool = False,
57+
test_person_ids=None,
58+
*args,
59+
**kwargs,
4160
):
4261
self._dataset = copy.copy(dataset)
4362
self._evaluation_folder = evaluation_folder

src/cehrbert/evaluations/transfer_learning_evaluation.py renamed to src/cehrbert/baseline_evaluation/transfer_learning_evaluation.py

File renamed without changes.

0 commit comments

Comments
 (0)