LOCAL STUDENT - fix validation

NeptuneHub · NeptuneHub · commit c6f2b8513b0d · 2026-01-22T13:17:52.000+01:00
diff --git a/student_clap/models/student_onnx_model.py b/student_clap/models/student_onnx_model.py
@@ -498,13 +498,19 @@ def train_step(self, batch: Dict) -> Dict:
         """
         Single training step on a batch.
 
-        Args:
-            batch: Dictionary with:
-                - 'audio_segments': List of audio segment tensors per song
-                - 'teacher_embeddings': Teacher embeddings from database
-                - 'song_ids': Song IDs for logging
-
-        Returns:
+        if torch.cuda.is_available() and str(self.device) == 'cuda':
+            self.model.to(self.device, dtype=torch.bfloat16)
+            self._cast_batchnorm_to_dtype(torch.bfloat16)
+            tensor_dtype = torch.bfloat16
+        elif torch.backends.mps.is_available() and str(self.device) == 'mps':
+            self.model.to(self.device, dtype=torch.bfloat16)
+            self._cast_batchnorm_to_dtype(torch.bfloat16)
+            tensor_dtype = torch.bfloat16
+        else:
+            self.model.to(self.device, dtype=torch.float32)
+            self._cast_batchnorm_to_dtype(torch.float32)
+            tensor_dtype = torch.float32
+        self.model.train()
             step_metrics: Dictionary with loss and performance metrics
         """
 
diff --git a/student_clap/train_real.py b/student_clap/train_real.py
@@ -282,12 +282,15 @@ def validate_real(trainer: StudentCLAPTrainer,
     # Set model to correct dtype for platform (match training)
     if torch.cuda.is_available() and str(trainer.device) == 'cuda':
         trainer.model.to(trainer.device, dtype=torch.bfloat16)
+        trainer._cast_batchnorm_to_dtype(torch.bfloat16)
         tensor_dtype = torch.bfloat16
     elif torch.backends.mps.is_available() and str(trainer.device) == 'mps':
         trainer.model.to(trainer.device, dtype=torch.bfloat16)
+        trainer._cast_batchnorm_to_dtype(torch.bfloat16)
         tensor_dtype = torch.bfloat16
     else:
         trainer.model.to(trainer.device, dtype=torch.float32)
+        trainer._cast_batchnorm_to_dtype(torch.float32)
         tensor_dtype = torch.float32
     
     # Collect embeddings