-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathML_build.py
More file actions
45 lines (36 loc) · 1.19 KB
/
ML_build.py
File metadata and controls
45 lines (36 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Import dependencies
import pandas as pd
import numpy as np
# Load the dataset in a dataframe object and include only four features as mentioned
url = "http://s3.amazonaws.com/assets.datacamp.com/course/Kaggle/train.csv"
df = pd.read_csv(url)
include = ['Age', 'Sex', 'Embarked', 'Survived'] # Only four features
df_ = df[include]
# Data Preprocessing
categoricals = []
for col, col_type in df_.dtypes.iteritems():
if col_type == 'O':
categoricals.append(col)
else:
df_[col].fillna(0, inplace=True)
df_ohe = pd.get_dummies(df_, columns=categoricals, dummy_na=True)
print('-------------')
print(df_ohe)
# Logistic Regression classifier
from sklearn.linear_model import LogisticRegression
dependent_variable = 'Survived'
x = df_ohe[df_ohe.columns.difference([dependent_variable])]
y = df_ohe[dependent_variable]
lr = LogisticRegression()
lr.fit(x, y)
# Save your model
from sklearn.externals import joblib
joblib.dump(lr, 'model.pkl')
print("Model dumped!")
# Load the model that you just saved
lr = joblib.load('model.pkl')
# Saving the data columns from training
model_columns = list(x.columns)
joblib.dump(model_columns, 'model_columns.pkl')
print("Models columns dumped!")
print(x)