Skip to content

Commit ad745ee

Browse files
authored
Update multilayer_perceptron_classifier_from_scratch.py
1 parent 9371e46 commit ad745ee

File tree

1 file changed

+82
-96
lines changed

1 file changed

+82
-96
lines changed

machine_learning/multilayer_perceptron_classifier_from_scratch.py

Lines changed: 82 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
import numpy as np
22
from tqdm import tqdm
33
from numpy.random import default_rng
4-
from numpy.random import seed
5-
6-
seed(42)
7-
84

95
class Dataloader:
106
"""
11-
DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
7+
DataLoader class for handling dataset operations. Supports:
8+
- data shuffling
9+
- one-hot encoding
10+
- train/test splitting
1211
1312
Example usage:
1413
>>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
1514
>>> y = [0, 1, 0, 0]
1615
>>> loader = Dataloader(X, y)
17-
>>> train_X, train_y, test_X, test_y = loader.get_Train_test_data()
16+
>>> train_X, train_y, test_X, test_y = loader.get_train_test_data()
1817
>>> train_X.shape
1918
(3, 2)
2019
>>> len(train_y)
@@ -36,7 +35,8 @@ class Dataloader:
3635

3736
def __init__(self, features: list[list[float]], labels: list[int]) -> None:
3837
"""
39-
Initializes the Dataloader instance with feature matrix features and labels labels.
38+
Initializes the Dataloader instance with a feature matrix (`features`)
39+
and corresponding labels (`labels`).
4040
4141
Args:
4242
features: Feature matrix of shape (n_samples, n_features).
@@ -48,11 +48,10 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
4848
self.y = np.array(labels)
4949
self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed
5050

51-
def get_Train_test_data(
52-
self,
53-
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
51+
def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
5452
"""
55-
Splits the data into training and testing sets. Here, we manually split the data.
53+
Splits the data into training and testing sets.
54+
Here, we manually split the data.
5655
5756
Returns:
5857
A tuple containing:
@@ -61,21 +60,13 @@ def get_Train_test_data(
6160
- Test data
6261
- Test labels
6362
"""
64-
train_data = np.array(
65-
[self.X[0], self.X[1], self.X[2]]
66-
) # First 3 samples for training
67-
train_labels = [
68-
np.array([self.y[0]]),
69-
np.array([self.y[1]]),
70-
np.array([self.y[2]]),
71-
] # Labels as np.ndarray
63+
train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training
64+
train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray
7265
test_data = np.array([self.X[3]]) # Last sample for testing
7366
test_labels = [np.array([self.y[3]])] # Labels as np.ndarray
7467
return train_data, train_labels, test_data, test_labels
7568

76-
def shuffle_data(
77-
self, paired_data: list[tuple[np.ndarray, int]]
78-
) -> list[tuple[np.ndarray, int]]:
69+
def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]:
7970
"""
8071
Shuffles the data randomly.
8172
@@ -89,7 +80,7 @@ def shuffle_data(
8980
return paired_data
9081

9182
def get_inout_dim(self) -> tuple[int, int]:
92-
train_data, train_labels, test_data, test_labels = self.get_Train_test_data()
83+
train_data, train_labels, test_data, test_labels = self.get_train_test_data()
9384
in_dim = train_data[0].shape[0]
9485
out_dim = len(train_labels)
9586
return in_dim, out_dim
@@ -112,44 +103,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
112103
return one_hot
113104

114105

115-
class MLP:
116-
"""
117-
A custom MLP class for implementing a simple multi-layer perceptron with
118-
forward propagation, backpropagation.
119-
120-
Attributes:
121-
learning_rate (float): Learning rate for gradient descent.
122-
gamma (float): Parameter to control learning rate adjustment.
123-
epoch (int): Number of epochs for training.
124-
hidden_dim (int): Dimension of the hidden layer.
125-
batch_size (int): Number of samples per mini-batch.
126-
train_loss (List[float]): List to store training loss for each fold.
127-
train_accuracy (List[float]): List to store training accuracy for each fold.
128-
test_loss (List[float]): List to store test loss for each fold.
129-
test_accuracy (List[float]): List to store test accuracy for each fold.
130-
dataloader (Dataloader): DataLoader object for handling training data.
131-
inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
132-
weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
133-
134-
Methods:
135-
get_inout_dim:obtain input dimension and output dimension.
136-
relu: Apply the ReLU activation function.
137-
relu_derivative: Compute the derivative of the ReLU function.
138-
forward: Perform a forward pass through the network.
139-
back_prop: Perform backpropagation to compute gradients.
140-
update_weights: Update the weights using gradients.
141-
update_learning_rate: Adjust the learning rate based on test accuracy.
142-
accuracy: Compute accuracy of the model.
143-
loss: Compute weighted MSE loss.
144-
train: Train the MLP over multiple folds with early stopping.
145-
146-
106+
class MLP():
147107
"""
148-
149-
def __init__(
150-
self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
151-
):
152-
self.learning_rate = learning_rate #
108+
A custom MLP class for implementing a simple multi-layer perceptron with
109+
forward propagation, backpropagation.
110+
111+
Attributes:
112+
learning_rate (float): Learning rate for gradient descent.
113+
gamma (float): Parameter to control learning rate adjustment.
114+
epoch (int): Number of epochs for training.
115+
hidden_dim (int): Dimension of the hidden layer.
116+
batch_size (int): Number of samples per mini-batch.
117+
train_loss (List[float]): List to store training loss for each fold.
118+
train_accuracy (List[float]): List to store training accuracy for each fold.
119+
test_loss (List[float]): List to store test loss for each fold.
120+
test_accuracy (List[float]): List to store test accuracy for each fold.
121+
dataloader (Dataloader): DataLoader object for handling training data.
122+
inter_variable (dict):
123+
Dictionary to store intermediate variables for backpropagation.
124+
weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
125+
List of weights for each fold.
126+
127+
Methods:
128+
get_inout_dim:obtain input dimension and output dimension.
129+
relu: Apply the ReLU activation function.
130+
relu_derivative: Compute the derivative of the ReLU function.
131+
forward: Perform a forward pass through the network.
132+
back_prop: Perform backpropagation to compute gradients.
133+
update_weights: Update the weights using gradients.
134+
update_learning_rate: Adjust the learning rate based on test accuracy.
135+
accuracy: Compute accuracy of the model.
136+
loss: Compute weighted MSE loss.
137+
train: Train the MLP over multiple folds with early stopping.
138+
139+
140+
"""
141+
def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):
142+
self.learning_rate = learning_rate
153143
self.gamma = gamma # learning_rate decay hyperparameter gamma
154144
self.epoch = epoch
155145
self.hidden_dim = hidden_dim
@@ -198,10 +188,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
198188
"""
199189

200190
in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim
201-
W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden)
191+
w1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden)
202192

203-
W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output)
204-
return W1, W2
193+
w2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output)
194+
return w1, w2
205195

206196
def relu(self, input_array: np.ndarray) -> np.ndarray:
207197
"""
@@ -231,12 +221,13 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
231221
"""
232222
return (input_array > 0).astype(float)
233223

224+
234225
def forward(
235-
self,
236-
input_data: np.ndarray,
237-
W1: np.ndarray,
238-
W2: np.ndarray,
239-
no_gradient: bool = False,
226+
self,
227+
input_data: np.ndarray,
228+
W1: np.ndarray,
229+
W2: np.ndarray,
230+
no_gradient: bool = False
240231
) -> np.ndarray:
241232
"""
242233
Performs a forward pass through the neural network with one hidden layer.
@@ -276,11 +267,11 @@ def forward(
276267
return a2
277268

278269
def back_prop(
279-
self,
280-
input_data: np.ndarray,
281-
true_labels: np.ndarray,
282-
W1: np.ndarray,
283-
W2: np.ndarray,
270+
self,
271+
input_data: np.ndarray,
272+
true_labels: np.ndarray,
273+
W1: np.ndarray,
274+
W2: np.ndarray
284275
) -> tuple[np.ndarray, np.ndarray]:
285276
"""
286277
Performs backpropagation to compute gradients for the weights.
@@ -322,22 +313,20 @@ def back_prop(
322313
grad_w2 = (
323314
np.dot(a1.T, delta_k) / batch_size
324315
) # (hidden, batch).dot(batch, output) = (hidden, output)
325-
input_data_flat = input_data.reshape(
326-
input_data.shape[0], -1
327-
) # (batch_size, input_dim)
316+
input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim)
328317
grad_w1 = (
329318
np.dot(input_data_flat.T, delta_j) / batch_size
330319
) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
331320

332321
return grad_w1, grad_w2
333322

334323
def update_weights(
335-
self,
336-
w1: np.ndarray,
337-
w2: np.ndarray,
338-
grad_w1: np.ndarray,
339-
grad_w2: np.ndarray,
340-
learning_rate: float,
324+
self,
325+
w1: np.ndarray,
326+
w2: np.ndarray,
327+
grad_w1: np.ndarray,
328+
grad_w2: np.ndarray,
329+
learning_rate: float
341330
) -> tuple[np.ndarray, np.ndarray]:
342331
"""
343332
Updates the weight matrices using the computed gradients and learning rate.
@@ -372,6 +361,7 @@ def update_weights(
372361
w2 -= learning_rate * grad_w2
373362
return w1, w2
374363

364+
375365
def update_learning_rate(self, learning_rate: float) -> float:
376366
"""
377367
Updates the learning rate by applying the decay factor gamma.
@@ -462,18 +452,17 @@ def train(self) -> None:
462452
>>> y = [0, 1, 0, 0]
463453
>>> loader = Dataloader(X, y)
464454
>>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
465-
>>> mlp.train()
466-
Test accuracy: 1.0
455+
>>> mlp.train() #doctest:+ELLIPSIS
456+
Test accuracy: ...
467457
"""
468458

469459
learning_rate = self.learning_rate
470-
train_data, train_labels, test_data, test_labels = (
471-
self.dataloader.get_Train_test_data()
472-
)
460+
train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data()
473461

474462
train_data = np.c_[train_data, np.ones(train_data.shape[0])]
475463
test_data = np.c_[test_data, np.ones(test_data.shape[0])]
476464

465+
477466
_, total_label_num = self.dataloader.get_inout_dim()
478467

479468
train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -488,16 +477,13 @@ def train(self) -> None:
488477

489478
for j in tqdm(range(self.epoch)):
490479
for k in range(0, train_data.shape[0], batch_size): # retrieve every image
491-
batch_imgs = train_data[k : k + batch_size]
492-
batch_labels = train_labels[k : k + batch_size]
493480

494-
output = self.forward(
495-
input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False
496-
)
481+
batch_imgs = train_data[k: k + batch_size]
482+
batch_labels = train_labels[k: k + batch_size]
483+
484+
output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False)
497485

498-
grad_W1, grad_W2 = self.back_prop(
499-
input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2
500-
)
486+
grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2)
501487

502488
W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)
503489

@@ -512,7 +498,7 @@ def train(self) -> None:
512498

513499
self.test_accuracy = test_accuracy_list
514500
self.test_loss = test_loss_list
515-
print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
501+
print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
516502

517503

518504
if __name__ == "__main__":

0 commit comments

Comments
 (0)