Skip to content

Commit e0ae20f

Browse files
authored
Update multilayer_perceptron_classifier_from_scratch.py
1 parent 0cb6734 commit e0ae20f

File tree

1 file changed

+75
-105
lines changed

1 file changed

+75
-105
lines changed

machine_learning/multilayer_perceptron_classifier_from_scratch.py

Lines changed: 75 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,17 @@
11
import numpy as np
22
from numpy.random import default_rng
3-
from tqdm import tqdm
4-
53
rng = default_rng(42)
6-
7-
84
class Dataloader:
95
"""
10-
DataLoader class for handling dataset operations. Supports:
11-
- data shuffling
12-
- one-hot encoding
13-
- train/test splitting
6+
DataLoader class for handling dataset, including data shuffling,
7+
one-hot encoding, and train-test splitting.
148
159
Example usage:
1610
>>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
1711
>>> y = [0, 1, 0, 0]
1812
>>> loader = Dataloader(X, y)
19-
>>> train_X, train_y, test_X, test_y = loader.get_train_test_data()
20-
>>> train_X.shape
21-
(3, 2)
22-
>>> len(train_y)
23-
3
24-
>>> test_X.shape
25-
(1, 2)
26-
>>> len(test_y)
27-
1
13+
>>> len(loader.get_train_test_data()) # Returns train and test data
14+
4
2815
>>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels
2916
array([[0.99, 0. ],
3017
[0. , 0.99],
@@ -38,21 +25,17 @@ class Dataloader:
3825

3926
def __init__(self, features: list[list[float]], labels: list[int]) -> None:
4027
"""
41-
Initializes the Dataloader instance
42-
with a feature matrix (`features`)
43-
and corresponding labels (`labels`).
28+
Initializes the Dataloader instance with feature matrix
29+
features and labels labels.
4430
4531
Args:
46-
features: Feature matrix of shape
47-
(n_samples, n_features).
48-
labels: List of labels of shape
49-
(n_samples,).
32+
features: Feature matrix of shape (n_samples, n_features).
33+
labels: List of labels of shape (n_samples,).
5034
"""
5135
# random seed
52-
self.rng = default_rng(42)
5336
self.X = np.array(features)
5437
self.y = np.array(labels)
55-
self.class_weights = {0: 1.0, 1: 1.0}
38+
self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed
5639

5740
def get_train_test_data(
5841
self,
@@ -74,8 +57,8 @@ def get_train_test_data(
7457
np.array([self.y[1]]),
7558
np.array([self.y[2]]),
7659
]
77-
test_data = np.array([self.X[3]]) # Last sample for testing
78-
test_labels = [np.array([self.y[3]])] # Labels as np.ndarray
60+
test_data = np.array([self.X[3]])
61+
test_labels = [np.array([self.y[3]])]
7962
return train_data, train_labels, test_data, test_labels
8063

8164
def shuffle_data(
@@ -85,13 +68,11 @@ def shuffle_data(
8568
Shuffles the data randomly.
8669
8770
Args:
88-
paired_data: List of tuples containing data
89-
and corresponding labels.
71+
paired_data: List of tuples containing data and corresponding labels.
9072
9173
Returns:
9274
A shuffled list of data-label pairs.
9375
"""
94-
default_rng.shuffle(paired_data) # Using the new random number generator
9576
return paired_data
9677

9778
def get_inout_dim(self) -> tuple[int, int]:
@@ -129,19 +110,15 @@ class MLP:
129110
epoch (int): Number of epochs for training.
130111
hidden_dim (int): Dimension of the hidden layer.
131112
batch_size (int): Number of samples per mini-batch.
132-
train_loss (List[float]):
133-
List to store training loss for each fold.
134-
train_accuracy (List[float]):
135-
List to store training accuracy for each fold.
113+
train_loss (List[float]): List to store training loss for each fold.
114+
train_accuracy (List[float]): List to store training accuracy for each fold.
136115
test_loss (List[float]): List to store test loss for each fold.
137-
test_accuracy (List[float]):
138-
List to store test accuracy for each fold.
139-
dataloader (Dataloader):
140-
DataLoader object for handling training data.
141-
inter_variable (dict):
142-
Dictionary to store intermediate variables for backpropagation.
116+
test_accuracy (List[float]): List to store test accuracy for each fold.
117+
dataloader (Dataloader): DataLoader object for handling training data.
118+
inter_variable (dict): Dictionary to store intermediate variables
119+
for backpropagation.
143120
weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
144-
List of weights for each fold.
121+
List of weights for each fold.
145122
146123
Methods:
147124
get_inout_dim:obtain input dimension and output dimension.
@@ -159,26 +136,26 @@ class MLP:
159136
"""
160137

161138
def __init__(
162-
self,
163-
dataloader: Dataloader,
164-
epoch: int,
165-
learning_rate: float,
166-
gamma: float = 1.0,
167-
hidden_dim: int = 2,
139+
self,
140+
dataloader: Dataloader,
141+
epoch: int,
142+
learning_rate: float,
143+
gamma: float = 1.0,
144+
hidden_dim: int = 2,
168145
) -> None:
169146
self.learning_rate = learning_rate
170147
self.gamma = gamma # learning_rate decay hyperparameter gamma
171148
self.epoch = epoch
172149
self.hidden_dim = hidden_dim
173150

174-
self.train_loss = []
175-
self.train_accuracy = []
176-
self.test_loss = []
177-
self.test_accuracy = []
151+
self.train_loss: list[float] = []
152+
self.train_accuracy: list[float] = []
153+
self.test_loss: list[float] = []
154+
self.test_accuracy: list[float] = []
178155

179156
self.dataloader = dataloader
180-
self.inter_variable = {}
181-
self.weights1_list = []
157+
self.inter_variable: dict[str, np.ndarray] = {}
158+
self.weights1_list: list[np.ndarray] = []
182159

183160
def get_inout_dim(self) -> tuple[int, int]:
184161
"""
@@ -215,7 +192,8 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
215192
"""
216193

217194
in_dim, out_dim = self.dataloader.get_inout_dim()
218-
w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim)
195+
w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) *
196+
np.sqrt(2.0 / in_dim))
219197
w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt(
220198
2.0 / self.hidden_dim
221199
)
@@ -262,23 +240,19 @@ def forward(
262240
Args:
263241
input_data: Input data, shape (batch_size, input_dim).
264242
w1: Weight matrix for input to hidden layer,
265-
shape (input_dim + 1, hidden_dim).
243+
shape (input_dim + 1, hidden_dim).
266244
w2: Weight matrix for hidden to output layer,
267-
shape (hidden_dim, output_dim).
245+
shape (hidden_dim, output_dim).
268246
no_gradient: If True, returns output without storing intermediates.
269247
270248
Returns:
271-
Output of the network after forward pass,
272-
shape (batch_size, output_dim).
249+
Output of the network after forward pass, shape (batch_size, output_dim).
273250
274251
Examples:
275252
>>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
276-
>>> x = np.array([[1.0, 2.0, 1.0]])
277-
253+
>>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
278254
>>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
279-
280255
>>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
281-
282256
>>> output = mlp.forward(x, w1, w2)
283257
>>> output.shape
284258
(1, 2)
@@ -306,33 +280,29 @@ def back_prop(
306280
Performs backpropagation to compute gradients for the weights.
307281
308282
Args:
309-
input_data: Input data, shape
310-
(batch_size, input_dim).
311-
true_labels: True labels, shape
312-
(batch_size, output_dim).
313-
w1: Weight matrix for input to
314-
hidden layer, shape (input_dim + 1, hidden_dim).
315-
w2: Weight matrix for hidden
316-
to output layer, shape (hidden_dim, output_dim).
283+
input_data: Input data, shape (batch_size, input_dim).
284+
true_labels: True labels, shape (batch_size, output_dim).
285+
w2: Weight matrix for hidden to output layer,
286+
shape (hidden_dim, output_dim).
317287
318288
Returns:
319289
Tuple of gradients (grad_w1, grad_w2) for the weight matrices.
320290
Examples:
321291
>>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
322-
>>> x = np.array([[1.0, 2.0, 1.0]])
323-
>>> y = np.array([[0.0, 1.0]])
292+
>>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
293+
>>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
324294
>>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
325-
>>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
326-
>>> _ = mlp.forward(x, w1, w2)
295+
>>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
296+
>>> _ = mlp.forward(x, w1, w2) # Run forward to set inter_variable
327297
>>> grad_w1, grad_w2 = mlp.back_prop(x, y, w2)
328298
>>> grad_w1.shape
329299
(3, 2)
330300
>>> grad_w2.shape
331301
(2, 2)
332302
"""
333-
a1 = self.inter_variable["a1"]
303+
a1 = self.inter_variable["a1"] # (batch_size, hidden_dim)
334304
z1 = self.inter_variable["z1"]
335-
a2 = self.inter_variable["a2"]
305+
a2 = self.inter_variable["a2"] # (batch_size, output_dim)
336306

337307
batch_size = input_data.shape[0]
338308

@@ -342,9 +312,13 @@ def back_prop(
342312
z1
343313
) # (batch, hidden_dim) 使用relu时
344314

345-
grad_w2 = np.dot(a1.T, delta_k) / batch_size
315+
grad_w2 = (
316+
np.dot(a1.T, delta_k) / batch_size
317+
) # (hidden, batch).dot(batch, output) = (hidden, output)
346318
input_data_flat = input_data.reshape(input_data.shape[0], -1)
347-
grad_w1 = np.dot(input_data_flat.T, delta_j) / batch_size
319+
grad_w1 = (
320+
np.dot(input_data_flat.T, delta_j) / batch_size
321+
) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
348322

349323
return grad_w1, grad_w2
350324

@@ -357,16 +331,17 @@ def update_weights(
357331
learning_rate: float,
358332
) -> tuple[np.ndarray, np.ndarray]:
359333
"""
360-
Updates the weight matrices using
361-
the computed gradients and learning rate.
334+
Updates the weight matrices using the computed gradients and learning rate.
362335
363336
Args:
364-
w1: Weight matrix for input to hidden layer, shape
365-
(input_dim + 1, hidden_dim).
366-
w2: Weight matrix for hidden to output layer, shape
367-
(hidden_dim, output_dim).
368-
grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim).
369-
grad_w2: Gradient for w2, shape (hidden_dim, output_dim).
337+
w1: Weight matrix for input to hidden layer,
338+
shape (input_dim + 1, hidden_dim).
339+
w2: Weight matrix for hidden to output layer,
340+
shape (hidden_dim, output_dim).
341+
grad_w1: Gradient for w1,
342+
shape (input_dim + 1, hidden_dim).
343+
grad_w2: Gradient for w2,
344+
shape (hidden_dim, output_dim).
370345
learning_rate: Learning rate for weight updates.
371346
372347
Returns:
@@ -378,8 +353,8 @@ def update_weights(
378353
>>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
379354
>>> grad_w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
380355
>>> grad_w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
381-
>>> learning_rate = 0.1
382-
>>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, learning_rate)
356+
>>> lr = 0.1
357+
>>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, lr)
383358
>>> new_w1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]])
384359
array([[ True, True],
385360
[ True, True],
@@ -413,8 +388,7 @@ def update_learning_rate(self, learning_rate: float) -> float:
413388
@staticmethod
414389
def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
415390
"""
416-
Computes the accuracy of predictions
417-
by comparing predicted and true labels.
391+
Computes the accuracy of predictions by comparing predicted and true labels.
418392
419393
Args:
420394
label: True labels, shape (batch_size, num_classes).
@@ -435,8 +409,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
435409
@staticmethod
436410
def loss(output: np.ndarray, label: np.ndarray) -> float:
437411
"""
438-
Computes the mean squared error loss
439-
between predictions and true labels.
412+
Computes the mean squared error loss between predictions and true labels.
440413
441414
Args:
442415
output: Predicted outputs, shape (batch_size, num_classes).
@@ -476,17 +449,16 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]:
476449
def train(self) -> None:
477450
"""
478451
Trains the MLP model using the provided dataloader
479-
for multiple folds and epochs.
452+
for multiple folds and epochs.
480453
481-
Saves the best model parameters
482-
for each fold and records accuracy/loss.
454+
Saves the best model parameters for each fold and records accuracy/loss.
483455
484456
Examples:
485457
>>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
486458
>>> y = [0, 1, 0, 0]
487459
>>> loader = Dataloader(X, y)
488460
>>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
489-
>>> mlp.train() #doctest:+ELLIPSIS
461+
>>> mlp.train() # doctest: +ELLIPSIS
490462
Test accuracy: ...
491463
"""
492464

@@ -505,19 +477,17 @@ def train(self) -> None:
505477

506478
w1, w2 = self.initialize()
507479

508-
train_accuracy_list, train_loss_list = [], []
509-
test_accuracy_list, test_loss_list = [], []
480+
test_accuracy_list: list[float] = []
481+
test_loss_list: list[float] = []
510482

511483
batch_size = 1
512484

513-
for j in tqdm(range(self.epoch)):
514-
for k in range(0, train_data.shape[0], batch_size):
485+
for _j in range(self.epoch):
486+
for k in range(0, train_data.shape[0], batch_size): # retrieve every image
515487
batch_imgs = train_data[k : k + batch_size]
516488
batch_labels = train_labels[k : k + batch_size]
517489

518-
output = self.forward(
519-
input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False
520-
)
490+
self.forward(input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False)
521491

522492
grad_w1, grad_w2 = self.back_prop(
523493
input_data=batch_imgs, true_labels=batch_labels, w2=w2
@@ -536,7 +506,7 @@ def train(self) -> None:
536506

537507
self.test_accuracy = test_accuracy_list
538508
self.test_loss = test_loss_list
539-
print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
509+
print("Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
540510

541511

542512
if __name__ == "__main__":

0 commit comments

Comments
 (0)