11import numpy as np
22from tqdm import tqdm
33from numpy .random import default_rng
4- from numpy .random import seed
5-
6- seed (42 )
7-
84
95class Dataloader :
106 """
11- DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
7+ DataLoader class for handling dataset operations. Supports:
8+ - data shuffling
9+ - one-hot encoding
10+ - train/test splitting
1211
1312 Example usage:
1413 >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
1514 >>> y = [0, 1, 0, 0]
1615 >>> loader = Dataloader(X, y)
17- >>> train_X, train_y, test_X, test_y = loader.get_Train_test_data ()
16+ >>> train_X, train_y, test_X, test_y = loader.get_train_test_data ()
1817 >>> train_X.shape
1918 (3, 2)
2019 >>> len(train_y)
@@ -36,7 +35,8 @@ class Dataloader:
3635
3736 def __init__ (self , features : list [list [float ]], labels : list [int ]) -> None :
3837 """
39- Initializes the Dataloader instance with feature matrix features and labels labels.
38+ Initializes the Dataloader instance with a feature matrix (`features`)
39+ and corresponding labels (`labels`).
4040
4141 Args:
4242 features: Feature matrix of shape (n_samples, n_features).
@@ -48,11 +48,10 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
4848 self .y = np .array (labels )
4949 self .class_weights = {0 : 1.0 , 1 : 1.0 } # Example class weights, adjust as needed
5050
51- def get_Train_test_data (
52- self ,
53- ) -> tuple [list [np .ndarray ], list [np .ndarray ], list [np .ndarray ], list [np .ndarray ]]:
51+ def get_train_test_data (self ) -> tuple [list [np .ndarray ], list [np .ndarray ], list [np .ndarray ], list [np .ndarray ]]:
5452 """
55- Splits the data into training and testing sets. Here, we manually split the data.
53+ Splits the data into training and testing sets.
54+ Here, we manually split the data.
5655
5756 Returns:
5857 A tuple containing:
@@ -61,21 +60,13 @@ def get_Train_test_data(
6160 - Test data
6261 - Test labels
6362 """
64- train_data = np .array (
65- [self .X [0 ], self .X [1 ], self .X [2 ]]
66- ) # First 3 samples for training
67- train_labels = [
68- np .array ([self .y [0 ]]),
69- np .array ([self .y [1 ]]),
70- np .array ([self .y [2 ]]),
71- ] # Labels as np.ndarray
63+ train_data = np .array ([self .X [0 ], self .X [1 ], self .X [2 ]]) # First 3 samples for training
64+ train_labels = [np .array ([self .y [0 ]]), np .array ([self .y [1 ]]), np .array ([self .y [2 ]])] # Labels as np.ndarray
7265 test_data = np .array ([self .X [3 ]]) # Last sample for testing
7366 test_labels = [np .array ([self .y [3 ]])] # Labels as np.ndarray
7467 return train_data , train_labels , test_data , test_labels
7568
76- def shuffle_data (
77- self , paired_data : list [tuple [np .ndarray , int ]]
78- ) -> list [tuple [np .ndarray , int ]]:
69+ def shuffle_data (self , paired_data : list [tuple [np .ndarray , int ]]) -> list [tuple [np .ndarray , int ]]:
7970 """
8071 Shuffles the data randomly.
8172
@@ -89,7 +80,7 @@ def shuffle_data(
8980 return paired_data
9081
9182 def get_inout_dim (self ) -> tuple [int , int ]:
92- train_data , train_labels , test_data , test_labels = self .get_Train_test_data ()
83+ train_data , train_labels , test_data , test_labels = self .get_train_test_data ()
9384 in_dim = train_data [0 ].shape [0 ]
9485 out_dim = len (train_labels )
9586 return in_dim , out_dim
@@ -112,44 +103,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray:
112103 return one_hot
113104
114105
115- class MLP :
116- """
117- A custom MLP class for implementing a simple multi-layer perceptron with
118- forward propagation, backpropagation.
119-
120- Attributes:
121- learning_rate (float): Learning rate for gradient descent.
122- gamma (float): Parameter to control learning rate adjustment.
123- epoch (int): Number of epochs for training.
124- hidden_dim (int): Dimension of the hidden layer.
125- batch_size (int): Number of samples per mini-batch.
126- train_loss (List[float]): List to store training loss for each fold.
127- train_accuracy (List[float]): List to store training accuracy for each fold.
128- test_loss (List[float]): List to store test loss for each fold.
129- test_accuracy (List[float]): List to store test accuracy for each fold.
130- dataloader (Dataloader): DataLoader object for handling training data.
131- inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
132- weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.
133-
134- Methods:
135- get_inout_dim:obtain input dimension and output dimension.
136- relu: Apply the ReLU activation function.
137- relu_derivative: Compute the derivative of the ReLU function.
138- forward: Perform a forward pass through the network.
139- back_prop: Perform backpropagation to compute gradients.
140- update_weights: Update the weights using gradients.
141- update_learning_rate: Adjust the learning rate based on test accuracy.
142- accuracy: Compute accuracy of the model.
143- loss: Compute weighted MSE loss.
144- train: Train the MLP over multiple folds with early stopping.
145-
146-
106+ class MLP ():
147107 """
148-
149- def __init__ (
150- self , dataloader , epoch : int , learning_rate : float , gamma = 1 , hidden_dim = 2
151- ):
152- self .learning_rate = learning_rate #
108+ A custom MLP class for implementing a simple multi-layer perceptron with
109+ forward propagation, backpropagation.
110+
111+ Attributes:
112+ learning_rate (float): Learning rate for gradient descent.
113+ gamma (float): Parameter to control learning rate adjustment.
114+ epoch (int): Number of epochs for training.
115+ hidden_dim (int): Dimension of the hidden layer.
116+ batch_size (int): Number of samples per mini-batch.
117+ train_loss (List[float]): List to store training loss for each fold.
118+ train_accuracy (List[float]): List to store training accuracy for each fold.
119+ test_loss (List[float]): List to store test loss for each fold.
120+ test_accuracy (List[float]): List to store test accuracy for each fold.
121+ dataloader (Dataloader): DataLoader object for handling training data.
122+ inter_variable (dict):
123+ Dictionary to store intermediate variables for backpropagation.
124+ weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
125+ List of weights for each fold.
126+
127+ Methods:
128+ get_inout_dim:obtain input dimension and output dimension.
129+ relu: Apply the ReLU activation function.
130+ relu_derivative: Compute the derivative of the ReLU function.
131+ forward: Perform a forward pass through the network.
132+ back_prop: Perform backpropagation to compute gradients.
133+ update_weights: Update the weights using gradients.
134+ update_learning_rate: Adjust the learning rate based on test accuracy.
135+ accuracy: Compute accuracy of the model.
136+ loss: Compute weighted MSE loss.
137+ train: Train the MLP over multiple folds with early stopping.
138+
139+
140+ """
141+ def __init__ (self , dataloader , epoch : int , learning_rate : float , gamma = 1 , hidden_dim = 2 ):
142+ self .learning_rate = learning_rate
153143 self .gamma = gamma # learning_rate decay hyperparameter gamma
154144 self .epoch = epoch
155145 self .hidden_dim = hidden_dim
@@ -198,10 +188,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
198188 """
199189
200190 in_dim , out_dim = self .dataloader .get_inout_dim () # in_dim here is image dim
201- W1 = np .random .randn (in_dim + 1 , self .hidden_dim ) * 0.01 # (in_dim, hidden)
191+ w1 = np .random .randn (in_dim + 1 , self .hidden_dim ) * 0.01 # (in_dim, hidden)
202192
203- W2 = np .random .randn (self .hidden_dim , out_dim ) * 0.01 # (hidden, output)
204- return W1 , W2
193+ w2 = np .random .randn (self .hidden_dim , out_dim ) * 0.01 # (hidden, output)
194+ return w1 , w2
205195
206196 def relu (self , input_array : np .ndarray ) -> np .ndarray :
207197 """
@@ -231,12 +221,13 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray:
231221 """
232222 return (input_array > 0 ).astype (float )
233223
224+
234225 def forward (
235- self ,
236- input_data : np .ndarray ,
237- W1 : np .ndarray ,
238- W2 : np .ndarray ,
239- no_gradient : bool = False ,
226+ self ,
227+ input_data : np .ndarray ,
228+ W1 : np .ndarray ,
229+ W2 : np .ndarray ,
230+ no_gradient : bool = False
240231 ) -> np .ndarray :
241232 """
242233 Performs a forward pass through the neural network with one hidden layer.
@@ -276,11 +267,11 @@ def forward(
276267 return a2
277268
278269 def back_prop (
279- self ,
280- input_data : np .ndarray ,
281- true_labels : np .ndarray ,
282- W1 : np .ndarray ,
283- W2 : np .ndarray ,
270+ self ,
271+ input_data : np .ndarray ,
272+ true_labels : np .ndarray ,
273+ W1 : np .ndarray ,
274+ W2 : np .ndarray
284275 ) -> tuple [np .ndarray , np .ndarray ]:
285276 """
286277 Performs backpropagation to compute gradients for the weights.
@@ -322,22 +313,20 @@ def back_prop(
322313 grad_w2 = (
323314 np .dot (a1 .T , delta_k ) / batch_size
324315 ) # (hidden, batch).dot(batch, output) = (hidden, output)
325- input_data_flat = input_data .reshape (
326- input_data .shape [0 ], - 1
327- ) # (batch_size, input_dim)
316+ input_data_flat = input_data .reshape (input_data .shape [0 ], - 1 ) # (batch_size, input_dim)
328317 grad_w1 = (
329318 np .dot (input_data_flat .T , delta_j ) / batch_size
330319 ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)
331320
332321 return grad_w1 , grad_w2
333322
334323 def update_weights (
335- self ,
336- w1 : np .ndarray ,
337- w2 : np .ndarray ,
338- grad_w1 : np .ndarray ,
339- grad_w2 : np .ndarray ,
340- learning_rate : float ,
324+ self ,
325+ w1 : np .ndarray ,
326+ w2 : np .ndarray ,
327+ grad_w1 : np .ndarray ,
328+ grad_w2 : np .ndarray ,
329+ learning_rate : float
341330 ) -> tuple [np .ndarray , np .ndarray ]:
342331 """
343332 Updates the weight matrices using the computed gradients and learning rate.
@@ -372,6 +361,7 @@ def update_weights(
372361 w2 -= learning_rate * grad_w2
373362 return w1 , w2
374363
364+
375365 def update_learning_rate (self , learning_rate : float ) -> float :
376366 """
377367 Updates the learning rate by applying the decay factor gamma.
@@ -462,18 +452,17 @@ def train(self) -> None:
462452 >>> y = [0, 1, 0, 0]
463453 >>> loader = Dataloader(X, y)
464454 >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
465- >>> mlp.train()
466- Test accuracy: 1.0
455+ >>> mlp.train() #doctest:+ELLIPSIS
456+ Test accuracy: ...
467457 """
468458
469459 learning_rate = self .learning_rate
470- train_data , train_labels , test_data , test_labels = (
471- self .dataloader .get_Train_test_data ()
472- )
460+ train_data , train_labels , test_data , test_labels = self .dataloader .get_train_test_data ()
473461
474462 train_data = np .c_ [train_data , np .ones (train_data .shape [0 ])]
475463 test_data = np .c_ [test_data , np .ones (test_data .shape [0 ])]
476464
465+
477466 _ , total_label_num = self .dataloader .get_inout_dim ()
478467
479468 train_labels = self .dataloader .one_hot_encode (train_labels , total_label_num )
@@ -488,16 +477,13 @@ def train(self) -> None:
488477
489478 for j in tqdm (range (self .epoch )):
490479 for k in range (0 , train_data .shape [0 ], batch_size ): # retrieve every image
491- batch_imgs = train_data [k : k + batch_size ]
492- batch_labels = train_labels [k : k + batch_size ]
493480
494- output = self .forward (
495- input_data = batch_imgs , W1 = W1 , W2 = W2 , no_gradient = False
496- )
481+ batch_imgs = train_data [k : k + batch_size ]
482+ batch_labels = train_labels [k : k + batch_size ]
483+
484+ output = self .forward (input_data = batch_imgs , W1 = W1 , W2 = W2 , no_gradient = False )
497485
498- grad_W1 , grad_W2 = self .back_prop (
499- input_data = batch_imgs , true_labels = batch_labels , W1 = W1 , W2 = W2
500- )
486+ grad_W1 , grad_W2 = self .back_prop (input_data = batch_imgs , true_labels = batch_labels , W1 = W1 , W2 = W2 )
501487
502488 W1 , W2 = self .update_weights (W1 , W2 , grad_W1 , grad_W2 , learning_rate )
503489
@@ -512,7 +498,7 @@ def train(self) -> None:
512498
513499 self .test_accuracy = test_accuracy_list
514500 self .test_loss = test_loss_list
515- print (f"Test accuracy:" , sum (test_accuracy_list ) / len (test_accuracy_list ))
501+ print (f"Test accuracy:" , sum (test_accuracy_list )/ len (test_accuracy_list ))
516502
517503
518504if __name__ == "__main__" :
0 commit comments