11import numpy as np
22from numpy .random import default_rng
3- from tqdm import tqdm
4-
53rng = default_rng (42 )
6-
7-
84class Dataloader :
95 """
10- DataLoader class for handling dataset operations. Supports:
11- - data shuffling
12- - one-hot encoding
13- - train/test splitting
6+ DataLoader class for handling dataset, including data shuffling,
7+ one-hot encoding, and train-test splitting.
148
159 Example usage:
1610 >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
1711 >>> y = [0, 1, 0, 0]
1812 >>> loader = Dataloader(X, y)
19- >>> train_X, train_y, test_X, test_y = loader.get_train_test_data()
20- >>> train_X.shape
21- (3, 2)
22- >>> len(train_y)
23- 3
24- >>> test_X.shape
25- (1, 2)
26- >>> len(test_y)
27- 1
13+ >>> len(loader.get_train_test_data()) # Returns train and test data
14+ 4
2815 >>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels
2916 array([[0.99, 0. ],
3017 [0. , 0.99],
@@ -38,21 +25,17 @@ class Dataloader:
3825
3926 def __init__ (self , features : list [list [float ]], labels : list [int ]) -> None :
4027 """
41- Initializes the Dataloader instance
42- with a feature matrix (`features`)
43- and corresponding labels (`labels`).
28+ Initializes the Dataloader instance with feature matrix
29+ features and labels labels.
4430
4531 Args:
46- features: Feature matrix of shape
47- (n_samples, n_features).
48- labels: List of labels of shape
49- (n_samples,).
32+ features: Feature matrix of shape (n_samples, n_features).
33+ labels: List of labels of shape (n_samples,).
5034 """
5135 # random seed
52- self .rng = default_rng (42 )
5336 self .X = np .array (features )
5437 self .y = np .array (labels )
55- self .class_weights = {0 : 1.0 , 1 : 1.0 }
38+ self .class_weights = {0 : 1.0 , 1 : 1.0 } # Example class weights, adjust as needed
5639
5740 def get_train_test_data (
5841 self ,
@@ -74,8 +57,8 @@ def get_train_test_data(
7457 np .array ([self .y [1 ]]),
7558 np .array ([self .y [2 ]]),
7659 ]
77- test_data = np .array ([self .X [3 ]]) # Last sample for testing
78- test_labels = [np .array ([self .y [3 ]])] # Labels as np.ndarray
60+ test_data = np .array ([self .X [3 ]])
61+ test_labels = [np .array ([self .y [3 ]])]
7962 return train_data , train_labels , test_data , test_labels
8063
8164 def shuffle_data (
@@ -85,13 +68,11 @@ def shuffle_data(
8568 Shuffles the data randomly.
8669
8770 Args:
88- paired_data: List of tuples containing data
89- and corresponding labels.
71+ paired_data: List of tuples containing data and corresponding labels.
9072
9173 Returns:
9274 A shuffled list of data-label pairs.
9375 """
94- default_rng .shuffle (paired_data ) # Using the new random number generator
9576 return paired_data
9677
9778 def get_inout_dim (self ) -> tuple [int , int ]:
@@ -129,19 +110,15 @@ class MLP:
129110 epoch (int): Number of epochs for training.
130111 hidden_dim (int): Dimension of the hidden layer.
131112 batch_size (int): Number of samples per mini-batch.
132- train_loss (List[float]):
133- List to store training loss for each fold.
134- train_accuracy (List[float]):
135- List to store training accuracy for each fold.
113+ train_loss (List[float]): List to store training loss for each fold.
114+ train_accuracy (List[float]): List to store training accuracy for each fold.
136115 test_loss (List[float]): List to store test loss for each fold.
137- test_accuracy (List[float]):
138- List to store test accuracy for each fold.
139- dataloader (Dataloader):
140- DataLoader object for handling training data.
141- inter_variable (dict):
142- Dictionary to store intermediate variables for backpropagation.
116+ test_accuracy (List[float]): List to store test accuracy for each fold.
117+ dataloader (Dataloader): DataLoader object for handling training data.
118+ inter_variable (dict): Dictionary to store intermediate variables
119+ for backpropagation.
143120 weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
144- List of weights for each fold.
121+ List of weights for each fold.
145122
146123 Methods:
147124 get_inout_dim:obtain input dimension and output dimension.
@@ -159,26 +136,26 @@ class MLP:
159136 """
160137
161138 def __init__ (
162- self ,
163- dataloader : Dataloader ,
164- epoch : int ,
165- learning_rate : float ,
166- gamma : float = 1.0 ,
167- hidden_dim : int = 2 ,
139+ self ,
140+ dataloader : Dataloader ,
141+ epoch : int ,
142+ learning_rate : float ,
143+ gamma : float = 1.0 ,
144+ hidden_dim : int = 2 ,
168145 ) -> None :
169146 self .learning_rate = learning_rate
170147 self .gamma = gamma # learning_rate decay hyperparameter gamma
171148 self .epoch = epoch
172149 self .hidden_dim = hidden_dim
173150
174- self .train_loss = []
175- self .train_accuracy = []
176- self .test_loss = []
177- self .test_accuracy = []
151+ self .train_loss : list [ float ] = []
152+ self .train_accuracy : list [ float ] = []
153+ self .test_loss : list [ float ] = []
154+ self .test_accuracy : list [ float ] = []
178155
179156 self .dataloader = dataloader
180- self .inter_variable = {}
181- self .weights1_list = []
157+ self .inter_variable : dict [ str , np . ndarray ] = {}
158+ self .weights1_list : list [ np . ndarray ] = []
182159
183160 def get_inout_dim (self ) -> tuple [int , int ]:
184161 """
@@ -215,7 +192,8 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
215192 """
216193
217194 in_dim , out_dim = self .dataloader .get_inout_dim ()
218- w1 = rng .standard_normal ((in_dim + 1 , self .hidden_dim )) * np .sqrt (2.0 / in_dim )
195+ w1 = (rng .standard_normal ((in_dim + 1 , self .hidden_dim )) *
196+ np .sqrt (2.0 / in_dim ))
219197 w2 = rng .standard_normal ((self .hidden_dim , out_dim )) * np .sqrt (
220198 2.0 / self .hidden_dim
221199 )
@@ -262,23 +240,19 @@ def forward(
262240 Args:
263241 input_data: Input data, shape (batch_size, input_dim).
264242 w1: Weight matrix for input to hidden layer,
265- shape (input_dim + 1, hidden_dim).
243+ shape (input_dim + 1, hidden_dim).
266244 w2: Weight matrix for hidden to output layer,
267- shape (hidden_dim, output_dim).
245+ shape (hidden_dim, output_dim).
268246 no_gradient: If True, returns output without storing intermediates.
269247
270248 Returns:
271- Output of the network after forward pass,
272- shape (batch_size, output_dim).
249+ Output of the network after forward pass, shape (batch_size, output_dim).
273250
274251 Examples:
275252 >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
276- >>> x = np.array([[1.0, 2.0, 1.0]])
277-
253+ >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
278254 >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
279-
280255 >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
281-
282256 >>> output = mlp.forward(x, w1, w2)
283257 >>> output.shape
284258 (1, 2)
@@ -306,33 +280,29 @@ def back_prop(
306280 Performs backpropagation to compute gradients for the weights.
307281
308282 Args:
309- input_data: Input data, shape
310- (batch_size, input_dim).
311- true_labels: True labels, shape
312- (batch_size, output_dim).
313- w1: Weight matrix for input to
314- hidden layer, shape (input_dim + 1, hidden_dim).
315- w2: Weight matrix for hidden
316- to output layer, shape (hidden_dim, output_dim).
283+ input_data: Input data, shape (batch_size, input_dim).
284+ true_labels: True labels, shape (batch_size, output_dim).
285+ w2: Weight matrix for hidden to output layer,
286+ shape (hidden_dim, output_dim).
317287
318288 Returns:
319289 Tuple of gradients (grad_w1, grad_w2) for the weight matrices.
320290 Examples:
321291 >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
322- >>> x = np.array([[1.0, 2.0, 1.0]])
323- >>> y = np.array([[0.0, 1.0]])
292+ >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
293+ >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
324294 >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
325- >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
326- >>> _ = mlp.forward(x, w1, w2)
295+ >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
296+ >>> _ = mlp.forward(x, w1, w2) # Run forward to set inter_variable
327297 >>> grad_w1, grad_w2 = mlp.back_prop(x, y, w2)
328298 >>> grad_w1.shape
329299 (3, 2)
330300 >>> grad_w2.shape
331301 (2, 2)
332302 """
333- a1 = self .inter_variable ["a1" ]
303+ a1 = self .inter_variable ["a1" ] # (batch_size, hidden_dim)
334304 z1 = self .inter_variable ["z1" ]
335- a2 = self .inter_variable ["a2" ]
305+ a2 = self .inter_variable ["a2" ] # (batch_size, output_dim)
336306
337307 batch_size = input_data .shape [0 ]
338308
@@ -342,9 +312,13 @@ def back_prop(
342312 z1
343313 ) # (batch, hidden_dim) 使用relu时
344314
345- grad_w2 = np .dot (a1 .T , delta_k ) / batch_size
315+ grad_w2 = (
316+ np .dot (a1 .T , delta_k ) / batch_size
317+ ) # (hidden, batch).dot(batch, output) = (hidden, output)
346318 input_data_flat = input_data .reshape (input_data .shape [0 ], - 1 )
347- grad_w1 = np .dot (input_data_flat .T , delta_j ) / batch_size
319+ grad_w1 = (
320+ np .dot (input_data_flat .T , delta_j ) / batch_size
321+ ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
348322
349323 return grad_w1 , grad_w2
350324
@@ -357,16 +331,17 @@ def update_weights(
357331 learning_rate : float ,
358332 ) -> tuple [np .ndarray , np .ndarray ]:
359333 """
360- Updates the weight matrices using
361- the computed gradients and learning rate.
334+ Updates the weight matrices using the computed gradients and learning rate.
362335
363336 Args:
364- w1: Weight matrix for input to hidden layer, shape
365- (input_dim + 1, hidden_dim).
366- w2: Weight matrix for hidden to output layer, shape
367- (hidden_dim, output_dim).
368- grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim).
369- grad_w2: Gradient for w2, shape (hidden_dim, output_dim).
337+ w1: Weight matrix for input to hidden layer,
338+ shape (input_dim + 1, hidden_dim).
339+ w2: Weight matrix for hidden to output layer,
340+ shape (hidden_dim, output_dim).
341+ grad_w1: Gradient for w1,
342+ shape (input_dim + 1, hidden_dim).
343+ grad_w2: Gradient for w2,
344+ shape (hidden_dim, output_dim).
370345 learning_rate: Learning rate for weight updates.
371346
372347 Returns:
@@ -378,8 +353,8 @@ def update_weights(
378353 >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
379354 >>> grad_w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
380355 >>> grad_w2 = np.array([[0.7, 0.8], [0.9, 1.0]])
381- >>> learning_rate = 0.1
382- >>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, learning_rate )
356+ >>> lr = 0.1
357+ >>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, lr )
383358 >>> new_w1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]])
384359 array([[ True, True],
385360 [ True, True],
@@ -413,8 +388,7 @@ def update_learning_rate(self, learning_rate: float) -> float:
413388 @staticmethod
414389 def accuracy (label : np .ndarray , y_hat : np .ndarray ) -> float :
415390 """
416- Computes the accuracy of predictions
417- by comparing predicted and true labels.
391+ Computes the accuracy of predictions by comparing predicted and true labels.
418392
419393 Args:
420394 label: True labels, shape (batch_size, num_classes).
@@ -435,8 +409,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
435409 @staticmethod
436410 def loss (output : np .ndarray , label : np .ndarray ) -> float :
437411 """
438- Computes the mean squared error loss
439- between predictions and true labels.
412+ Computes the mean squared error loss between predictions and true labels.
440413
441414 Args:
442415 output: Predicted outputs, shape (batch_size, num_classes).
@@ -476,17 +449,16 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]:
476449 def train (self ) -> None :
477450 """
478451 Trains the MLP model using the provided dataloader
479- for multiple folds and epochs.
452+ for multiple folds and epochs.
480453
481- Saves the best model parameters
482- for each fold and records accuracy/loss.
454+ Saves the best model parameters for each fold and records accuracy/loss.
483455
484456 Examples:
485457 >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
486458 >>> y = [0, 1, 0, 0]
487459 >>> loader = Dataloader(X, y)
488460 >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
489- >>> mlp.train() #doctest:+ELLIPSIS
461+ >>> mlp.train() # doctest: +ELLIPSIS
490462 Test accuracy: ...
491463 """
492464
@@ -505,19 +477,17 @@ def train(self) -> None:
505477
506478 w1 , w2 = self .initialize ()
507479
508- train_accuracy_list , train_loss_list = [], []
509- test_accuracy_list , test_loss_list = [], []
480+ test_accuracy_list : list [ float ] = []
481+ test_loss_list : list [ float ] = []
510482
511483 batch_size = 1
512484
513- for j in tqdm ( range (self .epoch ) ):
514- for k in range (0 , train_data .shape [0 ], batch_size ):
485+ for _j in range (self .epoch ):
486+ for k in range (0 , train_data .shape [0 ], batch_size ): # retrieve every image
515487 batch_imgs = train_data [k : k + batch_size ]
516488 batch_labels = train_labels [k : k + batch_size ]
517489
518- output = self .forward (
519- input_data = batch_imgs , w1 = w1 , w2 = w2 , no_gradient = False
520- )
490+ self .forward (input_data = batch_imgs , w1 = w1 , w2 = w2 , no_gradient = False )
521491
522492 grad_w1 , grad_w2 = self .back_prop (
523493 input_data = batch_imgs , true_labels = batch_labels , w2 = w2
@@ -536,7 +506,7 @@ def train(self) -> None:
536506
537507 self .test_accuracy = test_accuracy_list
538508 self .test_loss = test_loss_list
539- print (f "Test accuracy:" , sum (test_accuracy_list ) / len (test_accuracy_list ))
509+ print ("Test accuracy:" , sum (test_accuracy_list ) / len (test_accuracy_list ))
540510
541511
542512if __name__ == "__main__" :
0 commit comments