-
Notifications
You must be signed in to change notification settings - Fork 93
Expand file tree
/
Copy pathbuild.py
More file actions
30 lines (21 loc) · 1.07 KB
/
build.py
File metadata and controls
30 lines (21 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# %load q02_data_cleaning_all_2/build.py
# Default Imports
import pandas as pd
import numpy as np
from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning
from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
loan_data = loan_data.drop('Loan_ID', 1)
loan_data = outlier_removal(loan_data)
X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
# Write your solution here :
def data_cleaning_2(X_train, X_test, y_train, y_test):
CCL=['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
X_train=pd.get_dummies(X_train,columns=CCL, drop_first=True)
X_test=pd.get_dummies(X_test,columns=CCL, drop_first=True)
numeric_column=['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
for c in numeric_column:
X_train[c] = np.sqrt(X_train[c])
X_test[c] = np.sqrt(X_test[c])
return X_train, X_test, y_train, y_test
# data_cleaning_2(X_train, X_test, y_train, y_test)