-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTrainer.py
More file actions
35 lines (26 loc) · 860 Bytes
/
Trainer.py
File metadata and controls
35 lines (26 loc) · 860 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
from sklearn import ensemble
from sklearn.model_selection import train_test_split
import mlrun
from mlrun.frameworks.sklearn import apply_mlrun
@mlrun.handler()
def train(
dataset: pd.DataFrame,
label_column: str = "label",
n_estimators: int = 100,
learning_rate: float = 0.1,
max_depth: int = 3,
model_name: str = "cancer_classifier",
):
# Initialize the x & y data
x = dataset.drop(label_column, axis=1)
y = dataset[label_column]
# Train/Test split the dataset
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.1, random_state=42
)
model = ensemble.RandomForestClassifier(
n_estimators=n_estimators, max_depth=max_depth
)
apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
model.fit(x_train, y_train)