LxMLS · vascosmota · Jul 7, 2025
diff --git a/lxmls/deep_learning/numpy_models/rnn.py b/lxmls/deep_learning/numpy_models/rnn.py
@@ -11,34 +11,36 @@ def __init__(self, **config):
         # self.parameters
         RNN.__init__(self, **config)
 
-    def predict(self, input=None):
+    def predict(self, model_input=None):
         """
         Predict model outputs given input
         """
-        p_y = np.exp(self.log_forward(input)[0])
+        p_y = np.exp(self.log_forward(model_input)[0])
         return np.argmax(p_y, axis=1)
 
-    def update(self, input=None, output=None):
+    def update(self, model_input=None, output=None):
         """
         Update model parameters given batch of data
         """
-        gradients = self.backpropagation(input, output)
+        gradients = self.backpropagation(model_input, output)
         learning_rate = self.config['learning_rate']
         # Update each parameter with SGD rule
         num_parameters = len(self.parameters)
         for m in range(num_parameters):
             # Update weight
             self.parameters[m] -= learning_rate * gradients[m]
 
-    def log_forward(self, input):
+    def log_forward(self, model_input):
 
         # Get parameters and sizes
         W_e, W_x, W_h, W_y = self.parameters
         hidden_size = W_h.shape[0]
-        nr_steps = input.shape[0]
+        nr_steps = model_input.shape[0]
+        nr_tokens = W_e.shape[1]
 
         # Embedding layer
-        z_e = W_e[input, :]
+        input_ohe = index2onehot(model_input, nr_tokens)
+        z_e = input_ohe @ W_e.T
 
         # Recurrent layer
         h = np.zeros((nr_steps + 1, hidden_size))
@@ -56,19 +58,21 @@ def log_forward(self, input):
         # Softmax
         log_p_y = y - logsumexp(y, axis=1, keepdims=True)
 
-        return log_p_y, y, h, z_e, input
+        return log_p_y, y, h, z_e, model_input
 
-    def backpropagation(self, input, output):
+    def backpropagation(self, model_input, output) -> list[np.ndarray]:
+        """
+        Compute gradients for the RNN, with the back-propagation method.
 
-        '''
-        Compute gradientes, with the back-propagation method
-        inputs:
-            x: vector with the (embedding) indicies of the words of a
+        Inputs:
+            x: vector with the (embedding) indices of the words of a
                 sentence
-            outputs: vector with the indicies of the tags for each word of
-                        the sentence outputs:
-            gradient_parameters: vector with parameters gradientes
-        '''
+            outputs: vector with the indices of the tags for each word of
+                        the sentence
+        Outputs:
+            gradient_parameters (list[np.ndarray]): List with W_e, W_x, W_h, W_y parameters' gradients
+        """
+        # print(f"Model input shape: {model_input.shape}, Output shape: {output.shape}")
 
         # Get parameters and sizes
         W_e, W_x, W_h, W_y = self.parameters

diff --git a/lxmls/deep_learning/rnn.py b/lxmls/deep_learning/rnn.py
@@ -51,7 +51,7 @@ def initialize_rnn_parameters(input_size, embedding_size, hidden_size,
         W_e, W_x, W_h, W_y = loaded_parameters
 
         # Note: Pytorch requires this shape order fro nn.Embedding()
-        assert W_e.shape == (input_size, embedding_size), \
+        assert W_e.shape == (embedding_size, input_size), \
             "Embedding layer ze not matching saved model"
         assert W_x.shape == (hidden_size, embedding_size), \
             "Input layer ze not matching saved model"
@@ -65,7 +65,7 @@ def initialize_rnn_parameters(input_size, embedding_size, hidden_size,
         # INITIALIZE
 
         # Input layer
-        W_e = 0.01*random_seed.uniform(size=(input_size, embedding_size))
+        W_e = 0.01*random_seed.uniform(size=(embedding_size, input_size))
         # Input layer
         W_x = random_seed.uniform(size=(hidden_size, embedding_size))
         # Recurrent layer

diff --git a/tests/test_sequence_models_deep_learning.py b/tests/test_sequence_models_deep_learning.py
@@ -14,7 +14,7 @@
 
 
 @pytest.fixture(scope='module')
-def data(): 
+def data():
     return PostagCorpusData()
 
 
@@ -33,7 +33,7 @@ def test_numpy_rnn(data):
     # Get functions to get and set values of a particular weight of the model
     get_parameter, set_parameter = get_rnn_parameter_handlers(
         layer_index=-1,
-        row=0, 
+        row=0,
         column=0
     )
 
@@ -68,21 +68,21 @@ def test_numpy_rnn(data):
 
         # Batch loop
         for batch in train_batches:
-            model.update(input=batch['input'], output=batch['output'])
+            model.update(model_input=batch['input'], output=batch['output'])
 
         # Evaluation dev
         is_hit = []
         for batch in dev_set:
-            is_hit.extend(model.predict(input=batch['input']) == batch['output'])
+            is_hit.extend(model.predict(model_input=batch['input']) == batch['output'])
         accuracy = 100*np.mean(is_hit)
 
     # tested for 2 epochs only
     assert np.allclose(accuracy, 31.325, tolerance)
-        
+
     # Evaluation test
     is_hit = []
     for batch in test_set:
-        is_hit.extend(model.predict(input=batch['input']) == batch['output'])
+        is_hit.extend(model.predict(model_input=batch['input']) == batch['output'])
     accuracy = 100*np.mean(is_hit)
 
     assert np.allclose(accuracy, 30.105, tolerance)
@@ -122,7 +122,7 @@ def test_pytorch_rnn(data):
 
     # tested for 2 epochs only
     assert np.allclose(accuracy, 31.325, tolerance)
-        
+
     # Evaluation test
     is_hit = []
     for batch in test_set: