11import numpy as np
22from numpy .random import default_rng
33from tqdm import tqdm
4+
45rng = default_rng (42 )
6+
7+
58class Dataloader :
69 """
710 DataLoader class for handling dataset operations. Supports:
@@ -52,7 +55,7 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
5255 self .class_weights = {0 : 1.0 , 1 : 1.0 }
5356
5457 def get_train_test_data (
55- self
58+ self ,
5659 ) -> tuple [list [np .ndarray ], list [np .ndarray ], list [np .ndarray ], list [np .ndarray ]]:
5760 """
5861 Splits the data into training and testing sets.
@@ -65,16 +68,18 @@ def get_train_test_data(
6568 - Test data
6669 - Test labels
6770 """
68- train_data = np .array ([self .X [0 ], self .X [1 ], self .X [2 ]])
69- train_labels = \
70- [np .array ([self .y [0 ]]), np .array ([self .y [1 ]]), np .array ([self .y [2 ]])]
71+ train_data = np .array ([self .X [0 ], self .X [1 ], self .X [2 ]])
72+ train_labels = [
73+ np .array ([self .y [0 ]]),
74+ np .array ([self .y [1 ]]),
75+ np .array ([self .y [2 ]]),
76+ ]
7177 test_data = np .array ([self .X [3 ]]) # Last sample for testing
7278 test_labels = [np .array ([self .y [3 ]])] # Labels as np.ndarray
7379 return train_data , train_labels , test_data , test_labels
7480
7581 def shuffle_data (
76- self ,
77- paired_data : list [tuple [np .ndarray , int ]]
82+ self , paired_data : list [tuple [np .ndarray , int ]]
7883 ) -> list [tuple [np .ndarray , int ]]:
7984 """
8085 Shuffles the data randomly.
@@ -90,8 +95,7 @@ def shuffle_data(
9095 return paired_data
9196
9297 def get_inout_dim (self ) -> tuple [int , int ]:
93- train_data , train_labels , test_data , test_labels = (
94- self .get_train_test_data ())
98+ train_data , train_labels , test_data , test_labels = self .get_train_test_data ()
9599 in_dim = train_data [0 ].shape [0 ]
96100 out_dim = len (train_labels )
97101 return in_dim , out_dim
@@ -114,53 +118,53 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
114118 return one_hot
115119
116120
117- class MLP () :
121+ class MLP :
118122 """
119- A custom MLP class for implementing a simple multi-layer perceptron with
120- forward propagation, backpropagation.
121-
122- Attributes:
123- learning_rate (float): Learning rate for gradient descent.
124- gamma (float): Parameter to control learning rate adjustment.
125- epoch (int): Number of epochs for training.
126- hidden_dim (int): Dimension of the hidden layer.
127- batch_size (int): Number of samples per mini-batch.
128- train_loss (List[float]):
129- List to store training loss for each fold.
130- train_accuracy (List[float]):
131- List to store training accuracy for each fold.
132- test_loss (List[float]): List to store test loss for each fold.
133- test_accuracy (List[float]):
134- List to store test accuracy for each fold.
135- dataloader (Dataloader):
136- DataLoader object for handling training data.
137- inter_variable (dict):
138- Dictionary to store intermediate variables for backpropagation.
139- weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
140- List of weights for each fold.
141-
142- Methods:
143- get_inout_dim:obtain input dimension and output dimension.
144- relu: Apply the ReLU activation function.
145- relu_derivative: Compute the derivative of the ReLU function.
146- forward: Perform a forward pass through the network.
147- back_prop: Perform backpropagation to compute gradients.
148- update_weights: Update the weights using gradients.
149- update_learning_rate: Adjust the learning rate based on test accuracy.
150- accuracy: Compute accuracy of the model.
151- loss: Compute weighted MSE loss.
152- train: Train the MLP over multiple folds with early stopping.
123+ A custom MLP class for implementing a simple multi-layer perceptron with
124+ forward propagation, backpropagation.
125+
126+ Attributes:
127+ learning_rate (float): Learning rate for gradient descent.
128+ gamma (float): Parameter to control learning rate adjustment.
129+ epoch (int): Number of epochs for training.
130+ hidden_dim (int): Dimension of the hidden layer.
131+ batch_size (int): Number of samples per mini-batch.
132+ train_loss (List[float]):
133+ List to store training loss for each fold.
134+ train_accuracy (List[float]):
135+ List to store training accuracy for each fold.
136+ test_loss (List[float]): List to store test loss for each fold.
137+ test_accuracy (List[float]):
138+ List to store test accuracy for each fold.
139+ dataloader (Dataloader):
140+ DataLoader object for handling training data.
141+ inter_variable (dict):
142+ Dictionary to store intermediate variables for backpropagation.
143+ weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
144+ List of weights for each fold.
145+
146+ Methods:
147+ get_inout_dim:obtain input dimension and output dimension.
148+ relu: Apply the ReLU activation function.
149+ relu_derivative: Compute the derivative of the ReLU function.
150+ forward: Perform a forward pass through the network.
151+ back_prop: Perform backpropagation to compute gradients.
152+ update_weights: Update the weights using gradients.
153+ update_learning_rate: Adjust the learning rate based on test accuracy.
154+ accuracy: Compute accuracy of the model.
155+ loss: Compute weighted MSE loss.
156+ train: Train the MLP over multiple folds with early stopping.
153157
154158
155- """
159+ """
156160
157161 def __init__ (
158- self ,
159- dataloader : Dataloader ,
160- epoch : int ,
161- learning_rate : float ,
162- gamma : float = 1.0 ,
163- hidden_dim : int = 2 ,
162+ self ,
163+ dataloader : Dataloader ,
164+ epoch : int ,
165+ learning_rate : float ,
166+ gamma : float = 1.0 ,
167+ hidden_dim : int = 2 ,
164168 ) -> None :
165169 self .learning_rate = learning_rate
166170 self .gamma = gamma # learning_rate decay hyperparameter gamma
@@ -211,10 +215,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
211215 """
212216
213217 in_dim , out_dim = self .dataloader .get_inout_dim ()
214- w1 = ( rng .standard_normal ((in_dim + 1 , self .hidden_dim ))
215- * np .sqrt (2.0 / in_dim ))
216- w2 = ( rng . standard_normal (( self .hidden_dim , out_dim ))
217- * np . sqrt ( 2.0 / self . hidden_dim ) )
218+ w1 = rng .standard_normal ((in_dim + 1 , self .hidden_dim )) * np . sqrt ( 2.0 / in_dim )
219+ w2 = rng . standard_normal (( self . hidden_dim , out_dim )) * np .sqrt (
220+ 2.0 / self .hidden_dim
221+ )
218222 return w1 , w2
219223
220224 def relu (self , input_array : np .ndarray ) -> np .ndarray :
@@ -245,13 +249,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
245249 """
246250 return (input_array > 0 ).astype (float )
247251
248-
249252 def forward (
250- self ,
251- input_data : np .ndarray ,
252- w1 : np .ndarray ,
253- w2 : np .ndarray ,
254- no_gradient : bool = False
253+ self ,
254+ input_data : np .ndarray ,
255+ w1 : np .ndarray ,
256+ w2 : np .ndarray ,
257+ no_gradient : bool = False ,
255258 ) -> np .ndarray :
256259 """
257260 Performs a forward pass through the neural network with one hidden layer.
@@ -265,7 +268,7 @@ def forward(
265268 no_gradient: If True, returns output without storing intermediates.
266269
267270 Returns:
268- Output of the network after forward pass,
271+ Output of the network after forward pass,
269272 shape (batch_size, output_dim).
270273
271274 Examples:
@@ -297,10 +300,7 @@ def forward(
297300 return a2
298301
299302 def back_prop (
300- self ,
301- input_data : np .ndarray ,
302- true_labels : np .ndarray ,
303- w2 : np .ndarray
303+ self , input_data : np .ndarray , true_labels : np .ndarray , w2 : np .ndarray
304304 ) -> tuple [np .ndarray , np .ndarray ]:
305305 """
306306 Performs backpropagation to compute gradients for the weights.
@@ -342,32 +342,28 @@ def back_prop(
342342 z1
343343 ) # (batch, hidden_dim) 使用relu时
344344
345- grad_w2 = (
346- np .dot (a1 .T , delta_k ) / batch_size
347- )
348- input_data_flat = input_data .reshape (input_data .shape [0 ], - 1 )
349- grad_w1 = (
350- np .dot (input_data_flat .T , delta_j ) / batch_size
351- )
345+ grad_w2 = np .dot (a1 .T , delta_k ) / batch_size
346+ input_data_flat = input_data .reshape (input_data .shape [0 ], - 1 )
347+ grad_w1 = np .dot (input_data_flat .T , delta_j ) / batch_size
352348
353349 return grad_w1 , grad_w2
354350
355351 def update_weights (
356- self ,
357- w1 : np .ndarray ,
358- w2 : np .ndarray ,
359- grad_w1 : np .ndarray ,
360- grad_w2 : np .ndarray ,
361- learning_rate : float
352+ self ,
353+ w1 : np .ndarray ,
354+ w2 : np .ndarray ,
355+ grad_w1 : np .ndarray ,
356+ grad_w2 : np .ndarray ,
357+ learning_rate : float ,
362358 ) -> tuple [np .ndarray , np .ndarray ]:
363359 """
364- Updates the weight matrices using
360+ Updates the weight matrices using
365361 the computed gradients and learning rate.
366362
367363 Args:
368- w1: Weight matrix for input to hidden layer, shape
364+ w1: Weight matrix for input to hidden layer, shape
369365 (input_dim + 1, hidden_dim).
370- w2: Weight matrix for hidden to output layer, shape
366+ w2: Weight matrix for hidden to output layer, shape
371367 (hidden_dim, output_dim).
372368 grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim).
373369 grad_w2: Gradient for w2, shape (hidden_dim, output_dim).
@@ -396,7 +392,6 @@ def update_weights(
396392 w2 -= learning_rate * grad_w2
397393 return w1 , w2
398394
399-
400395 def update_learning_rate (self , learning_rate : float ) -> float :
401396 """
402397 Updates the learning rate by applying the decay factor gamma.
@@ -418,7 +413,7 @@ def update_learning_rate(self, learning_rate: float) -> float:
418413 @staticmethod
419414 def accuracy (label : np .ndarray , y_hat : np .ndarray ) -> float :
420415 """
421- Computes the accuracy of predictions
416+ Computes the accuracy of predictions
422417 by comparing predicted and true labels.
423418
424419 Args:
@@ -440,7 +435,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
440435 @staticmethod
441436 def loss (output : np .ndarray , label : np .ndarray ) -> float :
442437 """
443- Computes the mean squared error loss
438+ Computes the mean squared error loss
444439 between predictions and true labels.
445440
446441 Args:
@@ -480,10 +475,10 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]:
480475
481476 def train (self ) -> None :
482477 """
483- Trains the MLP model using the provided dataloader
478+ Trains the MLP model using the provided dataloader
484479 for multiple folds and epochs.
485480
486- Saves the best model parameters
481+ Saves the best model parameters
487482 for each fold and records accuracy/loss.
488483
489484 Examples:
@@ -497,12 +492,12 @@ def train(self) -> None:
497492
498493 learning_rate = self .learning_rate
499494 train_data , train_labels , test_data , test_labels = (
500- self .dataloader .get_train_test_data ())
495+ self .dataloader .get_train_test_data ()
496+ )
501497
502498 train_data = np .c_ [train_data , np .ones (train_data .shape [0 ])]
503499 test_data = np .c_ [test_data , np .ones (test_data .shape [0 ])]
504500
505-
506501 _ , total_label_num = self .dataloader .get_inout_dim ()
507502
508503 train_labels = self .dataloader .one_hot_encode (train_labels , total_label_num )
@@ -516,21 +511,16 @@ def train(self) -> None:
516511 batch_size = 1
517512
518513 for j in tqdm (range (self .epoch )):
519- for k in range (0 , train_data .shape [0 ], batch_size ):
520-
521- batch_imgs = train_data [k : k + batch_size ]
522- batch_labels = train_labels [k : k + batch_size ]
514+ for k in range (0 , train_data .shape [0 ], batch_size ):
515+ batch_imgs = train_data [k : k + batch_size ]
516+ batch_labels = train_labels [k : k + batch_size ]
523517
524518 output = self .forward (
525- input_data = batch_imgs ,
526- w1 = w1 ,
527- w2 = w2 ,
528- no_gradient = False )
519+ input_data = batch_imgs , w1 = w1 , w2 = w2 , no_gradient = False
520+ )
529521
530522 grad_w1 , grad_w2 = self .back_prop (
531- input_data = batch_imgs ,
532- true_labels = batch_labels ,
533- w2 = w2
523+ input_data = batch_imgs , true_labels = batch_labels , w2 = w2
534524 )
535525
536526 w1 , w2 = self .update_weights (w1 , w2 , grad_w1 , grad_w2 , learning_rate )
@@ -546,7 +536,7 @@ def train(self) -> None:
546536
547537 self .test_accuracy = test_accuracy_list
548538 self .test_loss = test_loss_list
549- print (f"Test accuracy:" , sum (test_accuracy_list )/ len (test_accuracy_list ))
539+ print (f"Test accuracy:" , sum (test_accuracy_list ) / len (test_accuracy_list ))
550540
551541
552542if __name__ == "__main__" :
0 commit comments