Skip to content

Commit 9b72645

Browse files
committed
adding backpropagation weight decay
1 parent b22fab0 commit 9b72645

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import warnings
2+
3+
import numpy as np
4+
from sklearn.datasets import load_breast_cancer
5+
from sklearn.metrics import accuracy_score
6+
from sklearn.preprocessing import MinMaxScaler
7+
8+
warnings.filterwarnings("ignore", category=DeprecationWarning)
9+
10+
11+
def train_network(
12+
neurons: int, x_train: np.array, y_train: np.array, epochs: int
13+
) -> tuple:
14+
"""
15+
Code the backpropagation algorithm with the technique of regularization
16+
weight decay.
17+
The chosen network architecture consists of 3 layers
18+
(the input layer, the hidden layer and the output layer).
19+
20+
Explanation here (Available just in Spanish):
21+
https://drive.google.com/file/d/1QTEbRVgevfK8QJ30tWcEbaNbBaKnvGWv/view?usp=sharing
22+
23+
>>> import numpy as np
24+
>>> x_train = np.array([[0.1, 0.2], [0.4, 0.6]])
25+
>>> y_train = np.array([[1], [0]])
26+
>>> neurons = 2
27+
>>> epochs = 10
28+
>>> result = train_network(neurons, x_train, y_train, epochs)
29+
>>> all(part is not None for part in result)
30+
True
31+
"""
32+
mu = 0.2
33+
lambda_ = 1e-4
34+
factor_scale = 0.001
35+
inputs = np.shape(x_train)[1]
36+
outputs = np.shape(y_train)[1]
37+
# initialization of weights and bias randomly in very small values
38+
rng = np.random.default_rng(seed=42)
39+
w_co = rng.random((int(inputs), int(neurons))) * factor_scale
40+
bias_co = rng.random((1, int(neurons))) * factor_scale
41+
w_cs = rng.random((int(neurons), int(outputs))) * factor_scale
42+
bias_cs = rng.random((1, int(outputs))) * factor_scale
43+
error = np.zeros(epochs)
44+
# iterative process
45+
k = 0
46+
while k < epochs:
47+
y = np.zeros(np.shape(y_train))
48+
for j in np.arange(0, len(x_train), 1):
49+
x = x_train[j]
50+
t = y_train[j]
51+
# forward step: calcul of aj, ak ,zj y zk
52+
aj = np.dot(x, w_co) + bias_co
53+
zj = relu(aj)
54+
ak = np.dot(zj, w_cs) + bias_cs
55+
zk = sigmoid(ak)
56+
y[j] = np.round(zk)
57+
58+
# backward step: Error gradient estimation
59+
g2p = d_sigmoid(ak) # for the weights and bias of the output layer neuron
60+
d_w_cs = g2p * zj.T
61+
d_bias_cs = g2p * 1
62+
grad_w_cs = (zk - t) * d_w_cs + lambda_ * w_cs
63+
grad_bias_cs = (zk - t) * d_bias_cs + lambda_ * bias_cs
64+
65+
g1p = d_relu(aj) # for the weights and bias of occult layer neurons
66+
d_w_co = np.zeros(np.shape(w_co))
67+
d_bias_co = np.zeros(np.shape(bias_co))
68+
for i in np.arange(0, np.shape(d_w_co)[1], 1):
69+
d_w_co[:, i] = g2p * w_cs[i] * g1p.T[i] * x.T
70+
d_bias_co[0, i] = g2p * w_cs[i] * g1p.T[i] * 1
71+
grad_w_co = (zk - t) * d_w_co + lambda_ * w_co
72+
grad_bias_co = (zk - t) * d_bias_co + lambda_ * bias_co
73+
74+
# Weight and bias update with regularization weight decay
75+
w_cs = (1 - mu * lambda_) * w_cs - mu * grad_w_cs
76+
bias_cs = (1 - mu * lambda_) * bias_cs - mu * grad_bias_cs
77+
w_co = (1 - mu * lambda_) * w_co - mu * grad_w_co
78+
bias_co = (1 - mu * lambda_) * bias_co - mu * grad_bias_co
79+
error[k] = 0.5 * np.sum((y - y_train) ** 2)
80+
k += 1
81+
return w_co, bias_co, w_cs, bias_cs, error
82+
83+
84+
def relu(input_: np.array) -> np.array:
85+
"""
86+
Relu activation function
87+
Hidden Layer due to it is less susceptible to vanish gradient
88+
89+
>>> relu(np.array([[0, -1, 2, 3, 0], [0, -1, -2, -3, 5]]))
90+
array([[0, 0, 2, 3, 0],
91+
[0, 0, 0, 0, 5]])
92+
"""
93+
return np.maximum(input_, 0)
94+
95+
96+
def d_relu(input_: np.array) -> np.array:
97+
"""
98+
Relu Activation derivate function
99+
>>> d_relu(np.array([[0, -1, 2, 3, 0], [0, -1, -2, -3, 5]]))
100+
array([[1, 0, 1, 1, 1],
101+
[1, 0, 0, 0, 1]])
102+
"""
103+
for i in np.arange(0, len(input_)):
104+
for j in np.arange(0, len(input_[i])):
105+
if input_[i, j] >= 0:
106+
input_[i, j] = 1
107+
else:
108+
input_[i, j] = 0
109+
return input_
110+
111+
112+
def sigmoid(input_: float) -> float:
113+
"""
114+
Sigmoid activation function
115+
Output layer
116+
>>> import numpy as np
117+
>>> sigmoid(0) is not None
118+
True
119+
"""
120+
return 1 / (1 + np.exp(-input_))
121+
122+
123+
def d_sigmoid(input_: float) -> float:
124+
"""
125+
Sigmoid activation derivate
126+
>>> import numpy as np
127+
>>> d_sigmoid(0) is not None
128+
True
129+
"""
130+
return sigmoid(input_) ** 2 * np.exp(-input_)
131+
132+
133+
def main() -> None:
134+
"""
135+
Import load_breast_cancer dataset
136+
It is a binary classification problem with 569 samples and 30 attributes
137+
Categorical value output [0 1]
138+
139+
The date is split 70% / 30% in train and test sets
140+
141+
Before train the neural network, the data is normalized to [0 1] interval
142+
143+
The function train_network() returns the weight and bias matrix to apply the
144+
transfer function to predict the output
145+
"""
146+
147+
inputs = load_breast_cancer()["data"]
148+
target = load_breast_cancer()["target"]
149+
target = target.reshape(np.shape(target)[0], 1)
150+
151+
scaler = MinMaxScaler()
152+
normalized_data = scaler.fit_transform(inputs)
153+
154+
train = int(np.round(np.shape(normalized_data)[0] * 0.7))
155+
x_train = normalized_data[0:train, :]
156+
x_test = normalized_data[train:, :]
157+
158+
y_train = target[0:train]
159+
y_test = target[train:]
160+
161+
# play with epochs and neuron numbers
162+
epochs = 5
163+
neurons = 5
164+
w_co, bias_co, w_cs, bias_cs, error = train_network(
165+
neurons, x_train, y_train, epochs
166+
)
167+
168+
# find the labels with the weights obtained ( apply network transfer function )
169+
yp_test = np.round(
170+
sigmoid(np.dot(relu(np.dot(x_test, w_co) + bias_co), w_cs) + bias_cs)
171+
)
172+
173+
print(f"accuracy: {accuracy_score(y_test, yp_test)}")
174+
175+
176+
if __name__ == "__main__":
177+
import doctest
178+
179+
doctest.testmod()
180+
main()

0 commit comments

Comments
 (0)