Xilinx · Giuseppe5 · Dec 2, 2025 · Nov 20, 2025 · Nov 28, 2025 · Nov 29, 2025
diff --git a/src/brevitas/nn/mixin/parameter.py b/src/brevitas/nn/mixin/parameter.py
@@ -84,7 +84,11 @@ def quant_weight(
     def register_parameter(self, name, value):
         super(QuantWeightMixin, self).register_parameter(name, value)
         if hasattr(self, 'weight_quant') and name == 'weight':
+            # When tensor_quant is init, we might lose information about the state (train vs eval)
+            # We keep track of them and restore them post initialization.
+            training_state = self.training
             self.weight_quant.init_tensor_quant()
+            self.weight_quant.train(training_state)
 
 
 class QuantBiasMixin(QuantProxyMixin):
@@ -113,5 +117,8 @@ def quant_bias(self):
     def register_parameter(self, name, value):
         super(QuantBiasMixin, self).register_parameter(name, value)
         if hasattr(self, 'bias_quant') and name == 'bias':
+            # When tensor_quant is init, we might lose information about the state (train vs eval)
+            # We keep track of them and restore them post initialization.
+            training_state = self.training
             self.bias_quant.init_tensor_quant()
-            self.bias_quant.to(self.bias.device)
+            self.bias_quant.train(training_state)
diff --git a/src/brevitas/proxy/quant_proxy.py b/src/brevitas/proxy/quant_proxy.py
@@ -137,7 +137,12 @@ def _load_from_state_dict(
         # but before the state_dict of tensor_quant is loaded, so in case e.g. there is a value
         # for the parameter already, it's not overwritten
         if config.REINIT_ON_STATE_DICT_LOAD:
+            # When tensor_quant is init, we might lose information about the state (train vs eval)
+            # We keep track of them and restore them post initialization.
+            training_state = self.training
             self.init_tensor_quant()
+            self.train(training_state)
+
         # for retrocompatibility with when it wasn't removed
         zero_hw_sentinel_key = prefix + 'zero_hw_sentinel'
         if zero_hw_sentinel_key in unexpected_keys:

diff --git a/tests/brevitas/proxy/test_proxy.py b/tests/brevitas/proxy/test_proxy.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import pytest
+import torch
 
 from brevitas.nn import QuantLinear
 from brevitas.nn.quant_activation import QuantReLU
@@ -83,3 +84,12 @@ def test_dynamic_act_proxy(self):
 
         model.act_quant.disable_quant = True
         assert model.act_quant.bit_width() is None
+
+    def test_training_state(self):
+        quant_layer = QuantLinear(10, 5, weight_quant=Int8WeightPerTensorFloat)
+        quant_layer.eval()
+
+        # Setting new weights will re-init the quant tensor
+        quant_layer.weight = torch.nn.Parameter(torch.randn_like(quant_layer.weight))
+
+        assert quant_layer.weight_quant.tensor_quant.training == False