-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoperations.py
More file actions
92 lines (74 loc) · 3.08 KB
/
operations.py
File metadata and controls
92 lines (74 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#importing required classes
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import cross_val_score
#loading dataset
import pandas as pd
import numpy as np
housing=pd.read_csv("housing.csv")
#stratified_test set
housing["income_cat"]=pd.cut(housing["median_income"],
bins=[0.0,1.5,3.0,4.5,6.0,np.inf]
,labels=[1,2,3,4,5])
split=StratifiedShuffleSplit(n_splits=1,test_size=0.2,random_state=42)
for train_index,test_index in split.split(housing,housing['income_cat']):
strat_train_set=housing.loc[train_index].drop("income_cat",axis=1)
strat_test_set=housing.loc[test_index].drop("income_cat",axis=1)
#working on training set
housing=strat_train_set.copy()
housing_labels=housing['median_house_value'].copy()
housing=housing.drop('median_house_value',axis=1)
#print(housing,housing_labels)
#separating numerical and categorical values
num_attribs=housing.drop("ocean_proximity",axis=1).columns.tolist()
cat_attribs=["ocean_proximity"]
#pipelines
num_pipeline= Pipeline([
("imputer",SimpleImputer(strategy="median")),
("scaler",StandardScaler())
])
cat_pipeline= Pipeline([
("encoder",OneHotEncoder(handle_unknown="ignore"))
])
full_pipeline=ColumnTransformer([
("nums",num_pipeline,num_attribs),
("cats",cat_pipeline,cat_attribs)
])
#transform data
housing_prep=full_pipeline.fit_transform(housing)
#print(housing_prep)
#training models
#linearRegression
lin_reg=LinearRegression()
lin_reg.fit(housing_prep,housing_labels)
linpred=lin_reg.predict(housing_prep)
#lin_rmse=root_mean_squared_error(housing_labels,linpred)
#print(f"the root mean squared error for lim-reg is{lin_rmse}\n")
lin_rmse= -cross_val_score(lin_reg,housing_prep,housing_labels,scoring="neg_root_mean_squared_error",cv=10)
print("for linear regression ")
print(pd.Series(lin_rmse).describe())
#descision tree
dec_reg=DecisionTreeRegressor()
dec_reg.fit(housing_prep,housing_labels)
decpred=dec_reg.predict(housing_prep)
#dec_rmse=root_mean_squared_error(housing_labels,decpred)
#print(f"the root mean squared error for dec-reg is{dec_rmse}\n")
dec_rmse= -cross_val_score(dec_reg,housing_prep,housing_labels,scoring="neg_root_mean_squared_error",cv=10)
print("for descision tree regression ")
print(pd.Series(dec_rmse).describe())
#randomforest
ran_reg=RandomForestRegressor()
ran_reg.fit(housing_prep,housing_labels)
ranpred=ran_reg.predict(housing_prep)
#ran_rmse=root_mean_squared_error(housing_labels,ranpred)
#print(f"the root mean squared error for ran-reg is{ran_rmse}\n")
ran_rmse= -cross_val_score(ran_reg,housing_prep,housing_labels,scoring="neg_root_mean_squared_error",cv=10)
print("for random forest ")
print(pd.Series(ran_rmse).describe())