Skip to content

Commit c60b768

Browse files
authored
Merge pull request #84 from DDSSS07/PSO_FS
Feature Selection using PSO ( Particle Swarm Optimization )
2 parents d6fb189 + 340d9c7 commit c60b768

File tree

2 files changed

+860
-0
lines changed

2 files changed

+860
-0
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
import numpy as np
2+
import pandas as pd
3+
import seaborn as sns
4+
from random import random
5+
from sklearn import metrics
6+
from sklearn.preprocessing import LabelEncoder
7+
from sklearn.model_selection import train_test_split
8+
from sklearn.model_selection import cross_validate
9+
from sklearn.linear_model import LogisticRegression
10+
from sklearn.metrics import confusion_matrix, make_scorer
11+
from sklearn.metrics import roc_auc_score, accuracy_score
12+
from sklearn.metrics import precision_score, recall_score
13+
14+
import warnings
15+
warnings.filterwarnings('ignore')
16+
17+
def classification_accuracy(y_actual, y_hat):
18+
TP = 0
19+
FP = 0
20+
TN = 0
21+
FN = 0
22+
23+
for i in range(len(y_hat)):
24+
if y_actual[i]==y_hat[i]==1:
25+
TP += 1
26+
if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
27+
FP += 1
28+
if y_actual[i]==y_hat[i]==0:
29+
TN += 1
30+
if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
31+
FN += 1
32+
33+
class_acc = float((TP+TN)) / float((TP+FP+TN+FN))
34+
35+
if TP == 0 and FN == 0 :
36+
recall = 0
37+
else:
38+
recall = float(TP) / float(TP + FN)
39+
40+
if TP == 0 and FP == 0:
41+
precision = 0
42+
else:
43+
precision = float(TP) / float( TP + FP )
44+
45+
return (class_acc, recall, precision)
46+
47+
def fitness_without_optimization(df1):
48+
49+
# Separate labels and features
50+
X = df1.drop(columns=['diagnosis'])
51+
y = df1['diagnosis']
52+
53+
# Convert the M to 1 and B to 0
54+
label = LabelEncoder()
55+
y = label.fit_transform(y)
56+
y[:20]
57+
58+
# Spilt the train and test data
59+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
60+
# we used 30% test data
61+
62+
# Logistic Regression
63+
LR = LogisticRegression()
64+
LR.fit(X_train, y_train)
65+
LR.score(X_train, y_train)
66+
y_pred = LR.predict(X_test)
67+
y_pred_train = LR.predict(X_train)
68+
69+
# find accuracy
70+
ac = accuracy_score(y_test, y_pred)
71+
ac_train = accuracy_score(y_train, y_pred_train)
72+
# Code for ROC_AUC curve
73+
rc = roc_auc_score(y_test, y_pred)
74+
75+
cm_2 = confusion_matrix(y_test, y_pred)
76+
77+
sns.heatmap(cm_2,annot=True,fmt="d")
78+
79+
class_acc = classification_accuracy(y_test, y_pred)
80+
81+
return class_acc
82+
83+
df = pd.read_csv('breast_cancer_data.csv')
84+
accuracy = fitness_without_optimization(df.copy())
85+
print('Accuracy :' + "{:.2f}".format(accuracy[0]))
86+
print('Precision :' + "{:.2f}".format(accuracy[1]))
87+
print('Recall :' + "{:.2f}".format(accuracy[2]))
88+
89+
class PSO:
90+
def __init__(self, f_count, df):
91+
92+
self.df = df.copy() # data
93+
self.f_count = f_count # Feature count
94+
self.pos_act = [] # Actual Positions radmon prob
95+
self.position = [] # Position prob > 0.5 set as 1 or 0
96+
self.velocity = [] # Velocity random between -1 and 1
97+
self.pos_best = [] # best position
98+
self.y_actual = [] # Y actual
99+
self.y_predict= [] # Y test predicted
100+
self.fit_best = (-1, -1, -1) # best fit accuracy, Recall, Precision
101+
self.fitness = (-1, -1, -1) # accuracy , recall, precsion
102+
103+
self.initialize(f_count)
104+
105+
def initialize(self, f_count):
106+
self.f_count = f_count
107+
self.initalize_position(f_count)
108+
self.initialize_velocity(f_count)
109+
110+
def set_data(self,data):
111+
self.df = data.copy()
112+
print(self.df.head())
113+
114+
#Initialize the positions > 0.5 is set as 1
115+
def initalize_position(self,f_count):
116+
self.pos_act = np.random.uniform(low=0, high=1, size=f_count).tolist()
117+
self.position = [1 if po > 0.5 else 0 for po in self.pos_act]
118+
119+
def initialize_velocity(self, f_count):
120+
self.velocity = np.random.uniform(low=-1, high=1, size=f_count).tolist()
121+
122+
def drop_columns(self, X):
123+
124+
for iteration, value in enumerate(self.position):
125+
if value == 0 :
126+
X_1 = X.drop(X.columns[iteration], axis = 1)
127+
return X_1
128+
129+
def classification_accuracy(self,y_actual, y_hat):
130+
TP = 0
131+
FP = 0
132+
TN = 0
133+
FN = 0
134+
135+
for i in range(len(y_hat)):
136+
if y_actual[i]==y_hat[i]==1:
137+
TP += 1
138+
if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
139+
FP += 1
140+
if y_actual[i]==y_hat[i]==0:
141+
TN += 1
142+
if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
143+
FN += 1
144+
145+
class_acc = float((TP+TN)) / float((TP+FP+TN+FN))
146+
147+
if TP == 0 and FN == 0 :
148+
recall = 0
149+
else:
150+
recall = float(TP) / float(TP + FN)
151+
if TP == 0 and FP == 0:
152+
precision = 0
153+
else:
154+
precision = float(TP) / float( TP + FP )
155+
156+
return (class_acc, recall, precision)
157+
158+
def process_data(self):
159+
160+
# Separate labels and features
161+
X = self.df.drop(columns=['diagnosis'])
162+
y = self.df['diagnosis']
163+
164+
X = self.drop_columns(X)
165+
166+
# Convert the M to 1 and B to 0
167+
label = LabelEncoder()
168+
y = label.fit_transform(y)
169+
y[:20]
170+
171+
# Spilt the train and test data
172+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
173+
# we used 30% test data
174+
# check the size before beginning
175+
X_train.shape, X_test.shape, y_train.shape, y_test.shape
176+
177+
# Logistic Regression
178+
LR = LogisticRegression()
179+
LR.fit(X_train, y_train)
180+
LR.score(X_train, y_train)
181+
y_pred = LR.predict(X_test)
182+
y_pred_train = LR.predict(X_train)
183+
184+
# find accuracy
185+
ac = accuracy_score(y_test, y_pred)
186+
ac_train = accuracy_score(y_train, y_pred_train)
187+
# Code for ROC_AUC curve
188+
rc = roc_auc_score(y_test, y_pred)
189+
190+
class_acc = self.classification_accuracy(y_test, y_pred)
191+
192+
self.y_actual = y_test
193+
self.y_predict = y_pred
194+
195+
return class_acc
196+
197+
# fitness check, checks accuarcy and precision and accurarcy
198+
def fitness_check(self,fitness, fit_best):
199+
is_fitness = False
200+
201+
if fitness[0] > fit_best[0] or fit_best[0] == -1:
202+
if fitness[1] >= fit_best[1] and fitness[2] >= fit_best[2]:
203+
is_fitness = True
204+
205+
return is_fitness
206+
207+
def evaluate_fitness(self):
208+
self.fitness = self.process_data()
209+
210+
if self.fitness_check(self.fitness, self.fit_best):
211+
self.pos_best = self.position.copy()
212+
self.fit_best = self.fitness
213+
214+
def update_velocity(self, pos_best_global):
215+
c1 = 1
216+
c2 = 2
217+
w = 0.5
218+
219+
for i in range(0, self.f_count):
220+
r1 = np.random.uniform(low=-1, high=1, size=1)[0]
221+
r2 = np.random.uniform(low=-1, high=1, size=1)[0]
222+
velocity_cog = c1*r1*(self.pos_best[i]-self.position[i])
223+
velocity_soc = c2*r2*(pos_best_global[i]-self.position[i])
224+
225+
self.velocity[i]=w*self.velocity[i]+velocity_cog+velocity_soc
226+
227+
def update_position(self):
228+
229+
for i in range(0, self.f_count):
230+
self.pos_act[i] = self.pos_act[i] + self.velocity[i]
231+
232+
#adjust max value
233+
234+
if self.pos_act[i] > 1 :
235+
self.pos_act[i] = 0.9
236+
237+
if self.pos_act[i] < 0 :
238+
self.pos_act[i] = 0.0
239+
240+
self.position[i] = 1 if self.pos_act[i] > 0.5 else 0
241+
242+
def print_position(self):
243+
print(self.position)
244+
245+
def print_velocity(self):
246+
print(self.velocity)
247+
248+
def pso_calculate(f_count, df):
249+
y_actual = []
250+
y_predict = []
251+
fitness_best_g = (-1, -1, -1)
252+
pos_fitness_g = []
253+
swarm = []
254+
no_population = 400
255+
iteration = 1
256+
257+
for i in range(0,no_population):
258+
swarm.append(PSO(f_count, df))
259+
260+
while iteration <= 10:
261+
262+
print('\nIteration : ', iteration)
263+
264+
for pos in range(0, no_population):
265+
266+
swarm[pos].evaluate_fitness()
267+
268+
#check current particle is the global best
269+
if swarm[pos].fitness_check(swarm[pos].fitness, fitness_best_g): #swarm[pos].fitness > fitness_best_g or fitness_best_g == -1:
270+
pos_fitness_g = list(swarm[pos].position)
271+
fitness_best_g = (swarm[pos].fitness)
272+
y_actual = swarm[pos].y_actual
273+
y_predict = swarm[pos].y_predict
274+
275+
for pos in range(0, no_population):
276+
swarm[pos].update_velocity(pos_fitness_g)
277+
swarm[pos].update_position()
278+
279+
print(pos_fitness_g)
280+
print(fitness_best_g)
281+
iteration+=1
282+
283+
284+
print('\n Final Solution:')
285+
print(pos_fitness_g)
286+
print(fitness_best_g)
287+
cm_2 = confusion_matrix(y_actual, y_predict)
288+
sns.heatmap(cm_2,annot=True,fmt="d")
289+
290+
pso_calculate(30,df)

0 commit comments

Comments
 (0)