[FEAT] HuberIQLoss (#1307)

elephaint · web-flow · commit 22315be4bafc · 2025-04-10T22:10:28.000Z
diff --git a/nbs/common.base_model.ipynb b/nbs/common.base_model.ipynb
@@ -272,14 +272,14 @@
     "            raise Exception(f'{type(self).__name__} does not support static exogenous variables.')\n",
     "\n",
     "        # Protections for loss functions\n",
-    "        if isinstance(self.loss, (losses.IQLoss)):\n",
+    "        if isinstance(self.loss, (losses.IQLoss, losses.HuberIQLoss)):\n",
     "            loss_type = type(self.loss)\n",
     "            if not isinstance(self.valid_loss, loss_type):\n",
     "                raise Exception(f'Please set valid_loss={type(self.loss).__name__}() when training with {type(self.loss).__name__}')\n",
     "        if isinstance(self.loss, (losses.MQLoss, losses.HuberMQLoss)):\n",
     "            if not isinstance(self.valid_loss, (losses.MQLoss, losses.HuberMQLoss)):\n",
     "                raise Exception(f'Please set valid_loss to MQLoss() or HuberMQLoss() when training with {type(self.loss).__name__}')\n",
-    "        if isinstance(self.valid_loss, losses.IQLoss):\n",
+    "        if isinstance(self.valid_loss, (losses.IQLoss, losses.HuberIQLoss)):\n",
     "            valid_loss_type = type(self.valid_loss)\n",
     "            if not isinstance(self.loss, valid_loss_type):\n",
     "                raise Exception(f'Please set loss={type(self.valid_loss).__name__}() when validating with {type(self.valid_loss).__name__}')        \n",
@@ -425,7 +425,7 @@
     "        )\n",
     "    \n",
     "    def _set_quantiles(self, quantiles=None):\n",
-    "        if quantiles is None and isinstance(self.loss, losses.IQLoss):\n",
+    "        if quantiles is None and isinstance(self.loss, (losses.IQLoss, losses.HuberIQLoss)):\n",
     "            self.loss.update_quantile(q=[0.5])\n",
     "        elif hasattr(self.loss, 'update_quantile') and callable(self.loss.update_quantile):\n",
     "            self.loss.update_quantile(q=quantiles)\n",
diff --git a/nbs/common.model_checks.ipynb b/nbs/common.model_checks.ipynb
@@ -146,7 +146,7 @@
     "# Tests a model against every loss function\n",
     "def check_loss_functions(model_class):\n",
     "    loss_list = [losses.MAE(), losses.MSE(), losses.RMSE(), losses.MAPE(), losses.SMAPE(), losses.MASE(seasonality=7), \n",
-    "              losses.QuantileLoss(q=0.5), losses.MQLoss(), losses.IQLoss(), losses.DistributionLoss(\"Normal\"), \n",
+    "              losses.QuantileLoss(q=0.5), losses.MQLoss(), losses.IQLoss(), losses.HuberIQLoss(), losses.DistributionLoss(\"Normal\"), \n",
     "              losses.DistributionLoss(\"StudentT\"), losses.DistributionLoss(\"Poisson\"), losses.DistributionLoss(\"NegativeBinomial\"), \n",
     "              losses.DistributionLoss(\"Tweedie\", rho=1.5), losses.DistributionLoss(\"ISQF\"), losses.PMM(), losses.PMM(weighted=True), \n",
     "              losses.GMM(), losses.GMM(weighted=True), losses.NBMM(), losses.NBMM(weighted=True), losses.HuberLoss(), \n",
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -84,7 +84,7 @@
     "\n",
     "from neuralforecast.common._base_model import DistributedConfig\n",
     "from neuralforecast.compat import SparkDataFrame\n",
-    "from neuralforecast.losses.pytorch import IQLoss\n",
+    "from neuralforecast.losses.pytorch import IQLoss, HuberIQLoss\n",
     "from neuralforecast.tsdataset import _FilesDataset, TimeSeriesDataset, LocalFilesTimeSeriesDataset\n",
     "from neuralforecast.models import (\n",
     "    GRU, LSTM, RNN, TCN, DeepAR, DilatedRNN,\n",
@@ -718,7 +718,7 @@
     "            if count_names[model_name] > 0:\n",
     "                model_name += str(count_names[model_name])\n",
     "\n",
-    "            if add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+    "            if add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, (IQLoss, HuberIQLoss))):\n",
     "                continue\n",
     "\n",
     "            names.extend(model_name + n for n in model.loss.output_names)\n",
@@ -1052,7 +1052,7 @@
     "\n",
     "        fcsts_list: List = []\n",
     "        for model in self.models:\n",
-    "            if self._add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+    "            if self._add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, (IQLoss, HuberIQLoss))):\n",
     "                continue\n",
     "\n",
     "            model.fit(dataset=self.dataset,\n",
@@ -1687,7 +1687,7 @@
     "\n",
     "            # Predict for every quantile or level if requested and the loss function supports it\n",
     "            # case 1: DistributionLoss and MixtureLosses\n",
-    "            if quantiles_ is not None and not isinstance(model.loss, IQLoss) and hasattr(model.loss, 'update_quantile') and callable(model.loss.update_quantile):\n",
+    "            if quantiles_ is not None and not isinstance(model.loss, (IQLoss, HuberIQLoss)) and hasattr(model.loss, 'update_quantile') and callable(model.loss.update_quantile):\n",
     "                model_fcsts = model.predict(dataset=dataset, quantiles = quantiles_, **data_kwargs)\n",
     "                fcsts_list.append(model_fcsts)      \n",
     "                col_names = []\n",
@@ -1702,7 +1702,7 @@
     "                else:\n",
     "                    cols.extend(col_names)\n",
     "            # case 2: IQLoss\n",
-    "            elif quantiles_ is not None and isinstance(model.loss, IQLoss):\n",
+    "            elif quantiles_ is not None and isinstance(model.loss, (IQLoss, HuberIQLoss)):\n",
     "                # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles\n",
     "                quantiles_iqloss = np.linspace(0.01, 0.99, 20)\n",
     "                fcsts_list_iqloss = []\n",
diff --git a/nbs/docs/capabilities/02_objectives.ipynb b/nbs/docs/capabilities/02_objectives.ipynb
@@ -30,7 +30,9 @@
     "|[**Poisson**](../../losses.pytorch.html#distributionloss)          |[**HuberQLoss**](../../losses.pytorch.html#huberized-quantile-loss)|\n",
     "|[**Negative Binomial**](../../losses.pytorch.html#distributionloss)|[**HuberMQLoss**](../../losses.pytorch.html#huberized-mqloss)      |\n",
     "|[**Tweedie**](../../losses.pytorch.html#distributionloss)          |[**IQLoss**](../../losses.pytorch.html#iqloss)  |\n",
-    "|[**PMM**](../../losses.pytorch.html#poisson-mixture-mesh-pmm) /[**GMM**](../../losses.pytorch.html#gaussian-mixture-mesh-gmm)  / [**NBMM**](../../losses.pytorch.html#negative-binomial-mixture-mesh-nbmm)  | [**ISQF**](../../losses.pytorch.html#isqf)  | "
+    "|[**PMM**](../../losses.pytorch.html#poisson-mixture-mesh-pmm) | [**HuberIQLoss**](../../losses.pytorch.html#huberized-iqloss)|\n",
+    "|[**GMM**](../../losses.pytorch.html#gaussian-mixture-mesh-gmm) | [**ISQF**](../../losses.pytorch.html#isqf)  |\n",
+    "|[**NBMM**](../../losses.pytorch.html#negative-binomial-mixture-mesh-nbmm) | |"
    ]
   }
  ],
diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb
@@ -1244,6 +1244,7 @@
     "            self._init_sampling_distribution(device)\n",
     "\n",
     "        quantiles = self.sampling_distr.sample(sample_size)\n",
+    "        self.q = quantiles.squeeze(-1)\n",
     "        self.has_sampled = True        \n",
     "        self.has_predicted = False\n",
     "\n",
@@ -4266,6 +4267,160 @@
     "![](imgs_losses/hmq_loss.png)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "affe8b2f",
+   "metadata": {},
+   "source": [
+    "## Huberized IQLoss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "31e71b0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class HuberIQLoss(HuberQLoss):\n",
+    "    \"\"\"Implicit Huber Quantile Loss\n",
+    "\n",
+    "    Computes the huberized quantile loss between `y` and `y_hat`, with the quantile `q` provided as an input to the network. \n",
+    "    HuberIQLoss measures the deviation of a huberized quantile forecast.\n",
+    "    By weighting the absolute deviation in a non symmetric way, the\n",
+    "    loss pays more attention to under or over estimation.\n",
+    "\n",
+    "    $$ \\mathrm{HuberQL}(\\\\mathbf{y}_{\\\\tau}, \\\\mathbf{\\hat{y}}^{(q)}_{\\\\tau}) = \n",
+    "    (1-q)\\, L_{\\delta}(y_{\\\\tau},\\; \\hat{y}^{(q)}_{\\\\tau}) \\mathbb{1}\\{ \\hat{y}^{(q)}_{\\\\tau} \\geq y_{\\\\tau} \\} + \n",
+    "    q\\, L_{\\delta}(y_{\\\\tau},\\; \\hat{y}^{(q)}_{\\\\tau}) \\mathbb{1}\\{ \\hat{y}^{(q)}_{\\\\tau} < y_{\\\\tau} \\} $$\n",
+    "\n",
+    "    **Parameters:**<br>\n",
+    "    `quantile_sampling`: str, default='uniform', sampling distribution used to sample the quantiles during training. Choose from ['uniform', 'beta']. <br>\n",
+    "    `horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window. <br>\n",
+    "    `delta`: float=1.0, Specifies the threshold at which to change between delta-scaled L1 and L2 loss.<br>\n",
+    "\n",
+    "    **References:**<br>\n",
+    "    [Gouttes, Adèle, Kashif Rasul, Mateusz Koren, Johannes Stephan, and Tofigh Naghibi, \"Probabilistic Time Series Forecasting with Implicit Quantile Networks\".](http://arxiv.org/abs/2107.03743)\n",
+    "    [Huber Peter, J (1964). \"Robust Estimation of a Location Parameter\". Annals of Statistics](https://projecteuclid.org/journals/annals-of-mathematical-statistics/volume-35/issue-1/Robust-Estimation-of-a-Location-Parameter/10.1214/aoms/1177703732.full)<br>\n",
+    "    [Roger Koenker and Gilbert Bassett, Jr., \"Regression Quantiles\".](https://www.jstor.org/stable/1913643)\n",
+    "    \"\"\"\n",
+    "    def __init__(self, cos_embedding_dim = 64, concentration0 = 1.0, concentration1 = 1.0, delta = 1.0, horizon_weight=None):\n",
+    "        self.update_quantile()\n",
+    "        super(HuberIQLoss, self).__init__(\n",
+    "            q = self.q,\n",
+    "            delta = delta,\n",
+    "            horizon_weight=horizon_weight\n",
+    "        )\n",
+    "\n",
+    "        self.cos_embedding_dim = cos_embedding_dim\n",
+    "        self.concentration0 = concentration0\n",
+    "        self.concentration1 = concentration1\n",
+    "        self.has_sampled = False\n",
+    "        self.has_predicted = False\n",
+    "\n",
+    "        self.quantile_layer = QuantileLayer(\n",
+    "            num_output=1, cos_embedding_dim=self.cos_embedding_dim\n",
+    "        )\n",
+    "        self.output_layer = nn.Sequential(\n",
+    "            nn.Linear(1, 1), nn.PReLU()\n",
+    "        )\n",
+    "        \n",
+    "    def _sample_quantiles(self, sample_size, device):\n",
+    "        if not self.has_sampled:\n",
+    "            self._init_sampling_distribution(device)\n",
+    "\n",
+    "        quantiles = self.sampling_distr.sample(sample_size)\n",
+    "        self.q = quantiles.squeeze(-1)\n",
+    "        self.has_sampled = True        \n",
+    "        self.has_predicted = False\n",
+    "\n",
+    "        return quantiles\n",
+    "    \n",
+    "    def _init_sampling_distribution(self, device):\n",
+    "        concentration0 = torch.tensor([self.concentration0],\n",
+    "                                      device=device,\n",
+    "                                      dtype=torch.float32)\n",
+    "        concentration1 = torch.tensor([self.concentration1],\n",
+    "                                      device=device,\n",
+    "                                      dtype=torch.float32)        \n",
+    "        self.sampling_distr = Beta(concentration0 = concentration0,\n",
+    "                                   concentration1 = concentration1)\n",
+    "\n",
+    "    def update_quantile(self, q: List[float] = [0.5]):\n",
+    "        self.q = q[0]\n",
+    "        self.output_names = [f\"_ql{q[0]}\"]\n",
+    "        self.has_predicted = True\n",
+    "\n",
+    "    def domain_map(self, y_hat):\n",
+    "        \"\"\"\n",
+    "        Adds IQN network to output of network\n",
+    "\n",
+    "        Input shapes to this function:\n",
+    "         \n",
+    "        Univariate: y_hat = [B, h, 1] \n",
+    "        Multivariate: y_hat = [B, h, N]\n",
+    "        \"\"\"\n",
+    "        if self.eval() and self.has_predicted:\n",
+    "            quantiles = torch.full(size=y_hat.shape, \n",
+    "                                    fill_value=self.q,\n",
+    "                                    device=y_hat.device,\n",
+    "                                    dtype=y_hat.dtype) \n",
+    "            quantiles = quantiles.unsqueeze(-1)             \n",
+    "        else:\n",
+    "            quantiles = self._sample_quantiles(sample_size=y_hat.shape,\n",
+    "                                        device=y_hat.device)\n",
+    "\n",
+    "        # Embed the quantiles and add to y_hat\n",
+    "        emb_taus = self.quantile_layer(quantiles)\n",
+    "        emb_inputs = y_hat.unsqueeze(-1) * (1.0 + emb_taus)\n",
+    "        emb_outputs = self.output_layer(emb_inputs)\n",
+    "        \n",
+    "        # Domain map\n",
+    "        y_hat = emb_outputs.squeeze(-1)\n",
+    "\n",
+    "        return y_hat\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ccf9024",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(HuberIQLoss, name='HuberIQLoss.__init__', title_level=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23a84e21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(HuberIQLoss.__call__, name='HuberIQLoss.__call__', title_level=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db4a68dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | hide\n",
+    "# Unit tests\n",
+    "# Check that default quantile is set to 0.5 at initialization\n",
+    "check = HuberIQLoss()\n",
+    "test_eq(check.q, 0.5)\n",
+    "\n",
+    "# Check that quantiles are correctly updated - prediction\n",
+    "check = HuberIQLoss()\n",
+    "check.update_quantile([0.7])\n",
+    "test_eq(check.q, 0.7)"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py
@@ -317,6 +317,18 @@
                                                                                                      'neuralforecast/losses/pytorch.py'),
                                                'neuralforecast.losses.pytorch.GMM.update_quantile': ( 'losses.pytorch.html#gmm.update_quantile',
                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss': ( 'losses.pytorch.html#huberiqloss',
+                                                                                              'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss.__init__': ( 'losses.pytorch.html#huberiqloss.__init__',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss._init_sampling_distribution': ( 'losses.pytorch.html#huberiqloss._init_sampling_distribution',
+                                                                                                                          'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss._sample_quantiles': ( 'losses.pytorch.html#huberiqloss._sample_quantiles',
+                                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss.domain_map': ( 'losses.pytorch.html#huberiqloss.domain_map',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberIQLoss.update_quantile': ( 'losses.pytorch.html#huberiqloss.update_quantile',
+                                                                                                              'neuralforecast/losses/pytorch.py'),
                                                'neuralforecast.losses.pytorch.HuberLoss': ( 'losses.pytorch.html#huberloss',
                                                                                             'neuralforecast/losses/pytorch.py'),
                                                'neuralforecast.losses.pytorch.HuberLoss.__call__': ( 'losses.pytorch.html#huberloss.__call__',
diff --git a/neuralforecast/common/_base_model.py b/neuralforecast/common/_base_model.py
@@ -233,7 +233,7 @@ def __init__(
             )
 
         # Protections for loss functions
-        if isinstance(self.loss, (losses.IQLoss)):
+        if isinstance(self.loss, (losses.IQLoss, losses.HuberIQLoss)):
             loss_type = type(self.loss)
             if not isinstance(self.valid_loss, loss_type):
                 raise Exception(
@@ -244,7 +244,7 @@ def __init__(
                 raise Exception(
                     f"Please set valid_loss to MQLoss() or HuberMQLoss() when training with {type(self.loss).__name__}"
                 )
-        if isinstance(self.valid_loss, losses.IQLoss):
+        if isinstance(self.valid_loss, (losses.IQLoss, losses.HuberIQLoss)):
             valid_loss_type = type(self.valid_loss)
             if not isinstance(self.loss, valid_loss_type):
                 raise Exception(
@@ -415,7 +415,9 @@ def _get_temporal_exogenous_cols(self, temporal_cols):
         )
 
     def _set_quantiles(self, quantiles=None):
-        if quantiles is None and isinstance(self.loss, losses.IQLoss):
+        if quantiles is None and isinstance(
+            self.loss, (losses.IQLoss, losses.HuberIQLoss)
+        ):
             self.loss.update_quantile(q=[0.5])
         elif hasattr(self.loss, "update_quantile") and callable(
             self.loss.update_quantile
diff --git a/neuralforecast/common/_model_checks.py b/neuralforecast/common/_model_checks.py
@@ -131,6 +131,7 @@ def check_loss_functions(model_class):
         losses.QuantileLoss(q=0.5),
         losses.MQLoss(),
         losses.IQLoss(),
+        losses.HuberIQLoss(),
         losses.DistributionLoss("Normal"),
         losses.DistributionLoss("StudentT"),
         losses.DistributionLoss("Poisson"),
diff --git a/neuralforecast/core.py b/neuralforecast/core.py
@@ -28,7 +28,7 @@
 
 from .common._base_model import DistributedConfig
 from .compat import SparkDataFrame
-from .losses.pytorch import IQLoss
+from .losses.pytorch import IQLoss, HuberIQLoss
 from neuralforecast.tsdataset import (
     _FilesDataset,
     TimeSeriesDataset,
@@ -673,7 +673,8 @@ def _get_model_names(self, add_level=False) -> List[str]:
                 model_name += str(count_names[model_name])
 
             if add_level and (
-                model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)
+                model.loss.outputsize_multiplier > 1
+                or isinstance(model.loss, (IQLoss, HuberIQLoss))
             ):
                 continue
 
@@ -1029,7 +1030,8 @@ def _no_refit_cross_validation(
         fcsts_list: List = []
         for model in self.models:
             if self._add_level and (
-                model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)
+                model.loss.outputsize_multiplier > 1
+                or isinstance(model.loss, (IQLoss, HuberIQLoss))
             ):
                 continue
 
@@ -1707,7 +1709,7 @@ def _generate_forecasts(
             # case 1: DistributionLoss and MixtureLosses
             if (
                 quantiles_ is not None
-                and not isinstance(model.loss, IQLoss)
+                and not isinstance(model.loss, (IQLoss, HuberIQLoss))
                 and hasattr(model.loss, "update_quantile")
                 and callable(model.loss.update_quantile)
             ):
@@ -1733,7 +1735,9 @@ def _generate_forecasts(
                 else:
                     cols.extend(col_names)
             # case 2: IQLoss
-            elif quantiles_ is not None and isinstance(model.loss, IQLoss):
+            elif quantiles_ is not None and isinstance(
+                model.loss, (IQLoss, HuberIQLoss)
+            ):
                 # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles
                 quantiles_iqloss = np.linspace(0.01, 0.99, 20)
                 fcsts_list_iqloss = []
diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,9 @@`
`30`	`30`	`"\|[Poisson](../../losses.pytorch.html#distributionloss) \|[HuberQLoss](../../losses.pytorch.html#huberized-quantile-loss)\|\n",`
`31`	`31`	`"\|[Negative Binomial](../../losses.pytorch.html#distributionloss)\|[HuberMQLoss](../../losses.pytorch.html#huberized-mqloss) \|\n",`
`32`	`32`	`"\|[Tweedie](../../losses.pytorch.html#distributionloss) \|[IQLoss](../../losses.pytorch.html#iqloss) \|\n",`
`33`		`- "\|[PMM](../../losses.pytorch.html#poisson-mixture-mesh-pmm) /[GMM](../../losses.pytorch.html#gaussian-mixture-mesh-gmm) / [NBMM](../../losses.pytorch.html#negative-binomial-mixture-mesh-nbmm) \| [ISQF](../../losses.pytorch.html#isqf) \| "`
	`33`	`+ "\|[PMM](../../losses.pytorch.html#poisson-mixture-mesh-pmm) \| [HuberIQLoss](../../losses.pytorch.html#huberized-iqloss)\|\n",`
	`34`	`+ "\|[GMM](../../losses.pytorch.html#gaussian-mixture-mesh-gmm) \| [ISQF](../../losses.pytorch.html#isqf) \|\n",`
	`35`	`+ "\|[NBMM](../../losses.pytorch.html#negative-binomial-mixture-mesh-nbmm) \| \|"`
`34`	`36`	`]`
`35`	`37`	`}`
`36`	`38`	`],`