pytorch-forecasting/pytorch_forecasting/models/mlp/_decodermlp.py at 0f592b3283e1314945be61250baa9fbb58db1ae7 · sktime/pytorch-forecasting · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""
Simple models based on fully connected networks
"""

from typing import Dict, List, Optional, Tuple, Union

import numpy as np
import torch
from torch import nn

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.metrics import (
    MAE,
    MAPE,
    MASE,
    RMSE,
    SMAPE,
    MultiHorizonMetric,
    QuantileLoss,
)
from pytorch_forecasting.models.base import BaseModelWithCovariates
from pytorch_forecasting.models.mlp.submodules import FullyConnectedModule
from pytorch_forecasting.models.nn.embeddings import MultiEmbedding


class DecoderMLP(BaseModelWithCovariates):
    """
    MLP on the decoder.

    MLP that predicts output only based on information available in the decoder.
    """

    def __init__(
        self,
        activation_class: str = "ReLU",
        hidden_size: int = 300,
        n_hidden_layers: int = 3,
        dropout: float = 0.1,
        norm: bool = True,
        static_categoricals: Optional[List[str]] = None,
        static_reals: Optional[List[str]] = None,
        time_varying_categoricals_encoder: Optional[List[str]] = None,
        time_varying_categoricals_decoder: Optional[List[str]] = None,
        categorical_groups: Optional[Dict[str, List[str]]] = None,
        time_varying_reals_encoder: Optional[List[str]] = None,
        time_varying_reals_decoder: Optional[List[str]] = None,
        embedding_sizes: Optional[Dict[str, Tuple[int, int]]] = None,
        embedding_paddings: Optional[List[str]] = None,
        embedding_labels: Optional[Dict[str, np.ndarray]] = None,
        x_reals: Optional[List[str]] = None,
        x_categoricals: Optional[List[str]] = None,
        output_size: Union[int, List[int]] = 1,
        target: Union[str, List[str]] = None,
        loss: MultiHorizonMetric = None,
        logging_metrics: nn.ModuleList = None,
        **kwargs,
    ):
        """
        Args:
            activation_class (str, optional): PyTorch activation class. Defaults to "ReLU".
            hidden_size (int, optional): hidden recurrent size - the most important hyperparameter along with
                ``n_hidden_layers``. Defaults to 10.
            n_hidden_layers (int, optional): Number of hidden layers - important hyperparameter. Defaults to 2.
            dropout (float, optional): Dropout. Defaults to 0.1.
            norm (bool, optional): if to use normalization in the MLP. Defaults to True.
            static_categoricals: integer of positions of static categorical variables
            static_reals: integer of positions of static continuous variables
            time_varying_categoricals_encoder: integer of positions of categorical variables for encoder
            time_varying_categoricals_decoder: integer of positions of categorical variables for decoder
            time_varying_reals_encoder: integer of positions of continuous variables for encoder
            time_varying_reals_decoder: integer of positions of continuous variables for decoder
            categorical_groups: dictionary where values
                are list of categorical variables that are forming together a new categorical
                variable which is the key in the dictionary
            x_reals: order of continuous variables in tensor passed to forward function
            x_categoricals: order of categorical variables in tensor passed to forward function
            embedding_sizes: dictionary mapping (string) indices to tuple of number of categorical classes and
                embedding size
            embedding_paddings: list of indices for embeddings which transform the zero's embedding to a zero vector
            embedding_labels: dictionary mapping (string) indices to list of categorical labels
            output_size (Union[int, List[int]], optional): number of outputs (e.g. number of quantiles for
                QuantileLoss and one target or list of output sizes).
            target (str, optional): Target variable or list of target variables. Defaults to None.
            loss (MultiHorizonMetric, optional): loss: loss function taking prediction and targets.
                Defaults to QuantileLoss.
            logging_metrics (nn.ModuleList, optional): Metrics to log during training.
                Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]).
        """  # noqa: E501
        if loss is None:
            loss = QuantileLoss()
        if logging_metrics is None:
            logging_metrics = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
        if static_categoricals is None:
            static_categoricals = []
        if static_reals is None:
            static_reals = []
        if time_varying_reals_encoder is None:
            time_varying_reals_encoder = []
        if time_varying_categoricals_decoder is None:
            time_varying_categoricals_decoder = []
        if categorical_groups is None:
            categorical_groups = {}
        if time_varying_reals_encoder is None:
            time_varying_reals_encoder = []
        if time_varying_reals_decoder is None:
            time_varying_reals_decoder = []
        if embedding_sizes is None:
            embedding_sizes = {}
        if embedding_paddings is None:
            embedding_paddings = []
        if embedding_labels is None:
            embedding_labels = {}
        if x_reals is None:
            x_reals = []
        if x_categoricals is None:
            x_categoricals = []
        self.save_hyperparameters()
        # store loss function separately as it is a module
        super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)

        self.input_embeddings = MultiEmbedding(
            embedding_sizes={
                name: val
                for name, val in embedding_sizes.items()
                if name in self.decoder_variables + self.static_variables
            },
            embedding_paddings=embedding_paddings,
            categorical_groups=categorical_groups,
            x_categoricals=x_categoricals,
        )
        # define network
        if isinstance(self.hparams.output_size, int):
            mlp_output_size = self.hparams.output_size
        else:
            mlp_output_size = sum(self.hparams.output_size)

        cont_size = len(self.decoder_reals_positions)
        cat_size = sum(self.input_embeddings.output_size.values())
        input_size = cont_size + cat_size

        self.mlp = FullyConnectedModule(
            dropout=dropout,
            norm=self.hparams.norm,
            activation_class=getattr(nn, self.hparams.activation_class),
            input_size=input_size,
            output_size=mlp_output_size,
            hidden_size=self.hparams.hidden_size,
            n_hidden_layers=self.hparams.n_hidden_layers,
        )

    @property
    def decoder_reals_positions(self) -> List[int]:
        return [
            self.hparams.x_reals.index(name)
            for name in self.reals
            if name in self.decoder_variables + self.static_variables
        ]

    def forward(
        self, x: Dict[str, torch.Tensor], n_samples: int = None
    ) -> Dict[str, torch.Tensor]:
        """
        Forward network
        """
        # x is a batch generated based on the TimeSeriesDataset
        batch_size = x["decoder_lengths"].size(0)
        embeddings = self.input_embeddings(
            x["decoder_cat"]
        )  # returns dictionary with embedding tensors
        network_input = torch.cat(
            [x["decoder_cont"][..., self.decoder_reals_positions]]
            + list(embeddings.values()),
            dim=-1,
        )
        prediction = self.mlp(network_input.view(-1, self.mlp.input_size)).view(
            batch_size, network_input.size(1), self.mlp.output_size
        )

        # cut prediction into pieces for multiple targets
        if self.n_targets > 1:
            prediction = torch.split(prediction, self.hparams.output_size, dim=-1)

        # We need to return a dictionary that at least contains the prediction
        # The parameter can be directly forwarded from the input.
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
        new_kwargs = cls.deduce_default_output_parameters(
            dataset, kwargs, QuantileLoss()
        )
        kwargs.update(new_kwargs)
        return super().from_dataset(dataset, **kwargs)