Skip to content

Commit 0cb6734

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 38ee6e2 commit 0cb6734

File tree

1 file changed

+90
-100
lines changed

1 file changed

+90
-100
lines changed

machine_learning/multilayer_perceptron_classifier_from_scratch.py

Lines changed: 90 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
from numpy.random import default_rng
33
from tqdm import tqdm
4+
45
rng = default_rng(42)
6+
7+
58
class Dataloader:
69
"""
710
DataLoader class for handling dataset operations. Supports:
@@ -52,7 +55,7 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
5255
self.class_weights = {0: 1.0, 1: 1.0}
5356

5457
def get_train_test_data(
55-
self
58+
self,
5659
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
5760
"""
5861
Splits the data into training and testing sets.
@@ -65,16 +68,18 @@ def get_train_test_data(
6568
- Test data
6669
- Test labels
6770
"""
68-
train_data = np.array([self.X[0], self.X[1], self.X[2]])
69-
train_labels = \
70-
[np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])]
71+
train_data = np.array([self.X[0], self.X[1], self.X[2]])
72+
train_labels = [
73+
np.array([self.y[0]]),
74+
np.array([self.y[1]]),
75+
np.array([self.y[2]]),
76+
]
7177
test_data = np.array([self.X[3]]) # Last sample for testing
7278
test_labels = [np.array([self.y[3]])] # Labels as np.ndarray
7379
return train_data, train_labels, test_data, test_labels
7480

7581
def shuffle_data(
76-
self,
77-
paired_data: list[tuple[np.ndarray, int]]
82+
self, paired_data: list[tuple[np.ndarray, int]]
7883
) -> list[tuple[np.ndarray, int]]:
7984
"""
8085
Shuffles the data randomly.
@@ -90,8 +95,7 @@ def shuffle_data(
9095
return paired_data
9196

9297
def get_inout_dim(self) -> tuple[int, int]:
93-
train_data, train_labels, test_data, test_labels = (
94-
self.get_train_test_data())
98+
train_data, train_labels, test_data, test_labels = self.get_train_test_data()
9599
in_dim = train_data[0].shape[0]
96100
out_dim = len(train_labels)
97101
return in_dim, out_dim
@@ -114,53 +118,53 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
114118
return one_hot
115119

116120

117-
class MLP():
121+
class MLP:
118122
"""
119-
A custom MLP class for implementing a simple multi-layer perceptron with
120-
forward propagation, backpropagation.
121-
122-
Attributes:
123-
learning_rate (float): Learning rate for gradient descent.
124-
gamma (float): Parameter to control learning rate adjustment.
125-
epoch (int): Number of epochs for training.
126-
hidden_dim (int): Dimension of the hidden layer.
127-
batch_size (int): Number of samples per mini-batch.
128-
train_loss (List[float]):
129-
List to store training loss for each fold.
130-
train_accuracy (List[float]):
131-
List to store training accuracy for each fold.
132-
test_loss (List[float]): List to store test loss for each fold.
133-
test_accuracy (List[float]):
134-
List to store test accuracy for each fold.
135-
dataloader (Dataloader):
136-
DataLoader object for handling training data.
137-
inter_variable (dict):
138-
Dictionary to store intermediate variables for backpropagation.
139-
weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
140-
List of weights for each fold.
141-
142-
Methods:
143-
get_inout_dim:obtain input dimension and output dimension.
144-
relu: Apply the ReLU activation function.
145-
relu_derivative: Compute the derivative of the ReLU function.
146-
forward: Perform a forward pass through the network.
147-
back_prop: Perform backpropagation to compute gradients.
148-
update_weights: Update the weights using gradients.
149-
update_learning_rate: Adjust the learning rate based on test accuracy.
150-
accuracy: Compute accuracy of the model.
151-
loss: Compute weighted MSE loss.
152-
train: Train the MLP over multiple folds with early stopping.
123+
A custom MLP class for implementing a simple multi-layer perceptron with
124+
forward propagation, backpropagation.
125+
126+
Attributes:
127+
learning_rate (float): Learning rate for gradient descent.
128+
gamma (float): Parameter to control learning rate adjustment.
129+
epoch (int): Number of epochs for training.
130+
hidden_dim (int): Dimension of the hidden layer.
131+
batch_size (int): Number of samples per mini-batch.
132+
train_loss (List[float]):
133+
List to store training loss for each fold.
134+
train_accuracy (List[float]):
135+
List to store training accuracy for each fold.
136+
test_loss (List[float]): List to store test loss for each fold.
137+
test_accuracy (List[float]):
138+
List to store test accuracy for each fold.
139+
dataloader (Dataloader):
140+
DataLoader object for handling training data.
141+
inter_variable (dict):
142+
Dictionary to store intermediate variables for backpropagation.
143+
weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
144+
List of weights for each fold.
145+
146+
Methods:
147+
get_inout_dim:obtain input dimension and output dimension.
148+
relu: Apply the ReLU activation function.
149+
relu_derivative: Compute the derivative of the ReLU function.
150+
forward: Perform a forward pass through the network.
151+
back_prop: Perform backpropagation to compute gradients.
152+
update_weights: Update the weights using gradients.
153+
update_learning_rate: Adjust the learning rate based on test accuracy.
154+
accuracy: Compute accuracy of the model.
155+
loss: Compute weighted MSE loss.
156+
train: Train the MLP over multiple folds with early stopping.
153157
154158
155-
"""
159+
"""
156160

157161
def __init__(
158-
self,
159-
dataloader: Dataloader,
160-
epoch: int,
161-
learning_rate: float,
162-
gamma: float = 1.0,
163-
hidden_dim: int = 2,
162+
self,
163+
dataloader: Dataloader,
164+
epoch: int,
165+
learning_rate: float,
166+
gamma: float = 1.0,
167+
hidden_dim: int = 2,
164168
) -> None:
165169
self.learning_rate = learning_rate
166170
self.gamma = gamma # learning_rate decay hyperparameter gamma
@@ -211,10 +215,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
211215
"""
212216

213217
in_dim, out_dim = self.dataloader.get_inout_dim()
214-
w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim))
215-
* np.sqrt(2.0 / in_dim))
216-
w2 = (rng.standard_normal((self.hidden_dim, out_dim))
217-
* np.sqrt(2.0 / self.hidden_dim))
218+
w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim)
219+
w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt(
220+
2.0 / self.hidden_dim
221+
)
218222
return w1, w2
219223

220224
def relu(self, input_array: np.ndarray) -> np.ndarray:
@@ -245,13 +249,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
245249
"""
246250
return (input_array > 0).astype(float)
247251

248-
249252
def forward(
250-
self,
251-
input_data: np.ndarray,
252-
w1: np.ndarray,
253-
w2: np.ndarray,
254-
no_gradient: bool = False
253+
self,
254+
input_data: np.ndarray,
255+
w1: np.ndarray,
256+
w2: np.ndarray,
257+
no_gradient: bool = False,
255258
) -> np.ndarray:
256259
"""
257260
Performs a forward pass through the neural network with one hidden layer.
@@ -265,7 +268,7 @@ def forward(
265268
no_gradient: If True, returns output without storing intermediates.
266269
267270
Returns:
268-
Output of the network after forward pass,
271+
Output of the network after forward pass,
269272
shape (batch_size, output_dim).
270273
271274
Examples:
@@ -297,10 +300,7 @@ def forward(
297300
return a2
298301

299302
def back_prop(
300-
self,
301-
input_data: np.ndarray,
302-
true_labels: np.ndarray,
303-
w2: np.ndarray
303+
self, input_data: np.ndarray, true_labels: np.ndarray, w2: np.ndarray
304304
) -> tuple[np.ndarray, np.ndarray]:
305305
"""
306306
Performs backpropagation to compute gradients for the weights.
@@ -342,32 +342,28 @@ def back_prop(
342342
z1
343343
) # (batch, hidden_dim) 使用relu时
344344

345-
grad_w2 = (
346-
np.dot(a1.T, delta_k) / batch_size
347-
)
348-
input_data_flat = input_data.reshape(input_data.shape[0], -1)
349-
grad_w1 = (
350-
np.dot(input_data_flat.T, delta_j) / batch_size
351-
)
345+
grad_w2 = np.dot(a1.T, delta_k) / batch_size
346+
input_data_flat = input_data.reshape(input_data.shape[0], -1)
347+
grad_w1 = np.dot(input_data_flat.T, delta_j) / batch_size
352348

353349
return grad_w1, grad_w2
354350

355351
def update_weights(
356-
self,
357-
w1: np.ndarray,
358-
w2: np.ndarray,
359-
grad_w1: np.ndarray,
360-
grad_w2: np.ndarray,
361-
learning_rate: float
352+
self,
353+
w1: np.ndarray,
354+
w2: np.ndarray,
355+
grad_w1: np.ndarray,
356+
grad_w2: np.ndarray,
357+
learning_rate: float,
362358
) -> tuple[np.ndarray, np.ndarray]:
363359
"""
364-
Updates the weight matrices using
360+
Updates the weight matrices using
365361
the computed gradients and learning rate.
366362
367363
Args:
368-
w1: Weight matrix for input to hidden layer, shape
364+
w1: Weight matrix for input to hidden layer, shape
369365
(input_dim + 1, hidden_dim).
370-
w2: Weight matrix for hidden to output layer, shape
366+
w2: Weight matrix for hidden to output layer, shape
371367
(hidden_dim, output_dim).
372368
grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim).
373369
grad_w2: Gradient for w2, shape (hidden_dim, output_dim).
@@ -396,7 +392,6 @@ def update_weights(
396392
w2 -= learning_rate * grad_w2
397393
return w1, w2
398394

399-
400395
def update_learning_rate(self, learning_rate: float) -> float:
401396
"""
402397
Updates the learning rate by applying the decay factor gamma.
@@ -418,7 +413,7 @@ def update_learning_rate(self, learning_rate: float) -> float:
418413
@staticmethod
419414
def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
420415
"""
421-
Computes the accuracy of predictions
416+
Computes the accuracy of predictions
422417
by comparing predicted and true labels.
423418
424419
Args:
@@ -440,7 +435,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
440435
@staticmethod
441436
def loss(output: np.ndarray, label: np.ndarray) -> float:
442437
"""
443-
Computes the mean squared error loss
438+
Computes the mean squared error loss
444439
between predictions and true labels.
445440
446441
Args:
@@ -480,10 +475,10 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]:
480475

481476
def train(self) -> None:
482477
"""
483-
Trains the MLP model using the provided dataloader
478+
Trains the MLP model using the provided dataloader
484479
for multiple folds and epochs.
485480
486-
Saves the best model parameters
481+
Saves the best model parameters
487482
for each fold and records accuracy/loss.
488483
489484
Examples:
@@ -497,12 +492,12 @@ def train(self) -> None:
497492

498493
learning_rate = self.learning_rate
499494
train_data, train_labels, test_data, test_labels = (
500-
self.dataloader.get_train_test_data())
495+
self.dataloader.get_train_test_data()
496+
)
501497

502498
train_data = np.c_[train_data, np.ones(train_data.shape[0])]
503499
test_data = np.c_[test_data, np.ones(test_data.shape[0])]
504500

505-
506501
_, total_label_num = self.dataloader.get_inout_dim()
507502

508503
train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
@@ -516,21 +511,16 @@ def train(self) -> None:
516511
batch_size = 1
517512

518513
for j in tqdm(range(self.epoch)):
519-
for k in range(0, train_data.shape[0], batch_size):
520-
521-
batch_imgs = train_data[k: k + batch_size]
522-
batch_labels = train_labels[k: k + batch_size]
514+
for k in range(0, train_data.shape[0], batch_size):
515+
batch_imgs = train_data[k : k + batch_size]
516+
batch_labels = train_labels[k : k + batch_size]
523517

524518
output = self.forward(
525-
input_data=batch_imgs,
526-
w1=w1,
527-
w2=w2,
528-
no_gradient=False)
519+
input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False
520+
)
529521

530522
grad_w1, grad_w2 = self.back_prop(
531-
input_data=batch_imgs,
532-
true_labels=batch_labels,
533-
w2=w2
523+
input_data=batch_imgs, true_labels=batch_labels, w2=w2
534524
)
535525

536526
w1, w2 = self.update_weights(w1, w2, grad_w1, grad_w2, learning_rate)
@@ -546,7 +536,7 @@ def train(self) -> None:
546536

547537
self.test_accuracy = test_accuracy_list
548538
self.test_loss = test_loss_list
549-
print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))
539+
print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
550540

551541

552542
if __name__ == "__main__":

0 commit comments

Comments
 (0)