diff --git a/neuralforecast/common/_base_model.py b/neuralforecast/common/_base_model.py index 84c1441bf..3e9ab5d72 100644 --- a/neuralforecast/common/_base_model.py +++ b/neuralforecast/common/_base_model.py @@ -1017,6 +1017,8 @@ def _compute_valid_loss( else: if not output_from_scaled_distribution: output = self._inv_normalization(y_hat=output, y_idx=y_idx) + # Inverse normalize insample_y to match the scale of outsample_y and output + insample_y = self._inv_normalization(y_hat=insample_y, y_idx=y_idx) valid_loss = self.valid_loss( y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask ) diff --git a/tests/test_core.py b/tests/test_core.py index 011db373b..02ae4ea12 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -53,6 +53,7 @@ from neuralforecast.losses.pytorch import ( GMM, MAE, + MASE, NBMM, PMM, DistributionLoss, @@ -2006,3 +2007,42 @@ def test_compute_valid_loss_distribution_to_quantile_scale(): f"Quantiles mean ({quants_mean:.2f}) and target mean ({target_mean:.2f}) " f"are not on the same scale. Ratio: {ratio:.2f}" ) + + +def test_mase_validation_loss_scale(setup_airplane_data): + """Test that MASE validation loss is correctly computed with proper scaling. + + This test verifies the fix for the scale mismatch bug where insample_y + was in normalized scale while outsample_y and y_hat were in original scale. + With the fix, all values passed to MASE should be in the same (original) scale. + """ + AirPassengersPanel_train, _ = setup_airplane_data + + # Use MLP with standard scaler and MASE validation loss + model = MLP( + h=12, + input_size=24, + loss=MAE(), + valid_loss=MASE(seasonality=12), + scaler_type="standard", + max_steps=5, + val_check_steps=1, + ) + nf = NeuralForecast(models=[model], freq="M") + + # Fit with validation set + nf.fit(AirPassengersPanel_train, val_size=12) + + # Get validation loss from trajectories (access fitted model from nf.models) + fitted_model = nf.models[0] + valid_trajectories = fitted_model.valid_trajectories + assert len(valid_trajectories) > 0, "No validation trajectories recorded" + + _, valid_loss = valid_trajectories[-1] + + # With the fix, MASE should be reasonable (< 50 for a minimally trained model) + # Before the fix, MASE was ~200+ due to scale mismatch + assert valid_loss < 50, ( + f"MASE validation loss is {valid_loss}, which indicates the scale mismatch " + f"bug may have regressed. Expected < 50 for a properly scaled MASE." + )