Refined the docs. Simply very minor changes

jrzaurin · jrzaurin · commit 23b93317ddb4 · 2022-03-09T19:02:09.000+01:00
diff --git a/pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py b/pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py
@@ -21,9 +21,17 @@ class BayesianWide(BaseBayesianModel):
     pred_dim: int
         size of the ouput tensor containing the predictions
     prior_sigma_1: float, default = 1.0
-        Prior of the sigma parameter for the first of the two Gaussian
-        distributions that will be mixed to produce the prior weight
-        distribution
+        The prior weight distribution is a scaled mixture of two Gaussian
+        densities:
+
+        .. math::
+           \begin{aligned}
+           P(\mathbf{w}) = \prod_{i=j} \pi N (\mathbf{w}_j | 0, \sigma_{1}^{2}) + (1 - \pi) N (\mathbf{w}_j | 0, \sigma_{2}^{2})
+           \end{aligned}
+
+        This is the prior of the sigma parameter for the first of the two
+        Gaussians that will be mixed to produce the prior weight
+        distribution.
     prior_sigma_2: float, default = 0.002
         Prior of the sigma parameter for the second of the two Gaussian
         distributions that will be mixed to produce the prior weight
diff --git a/pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py b/pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
@@ -57,9 +57,17 @@ class BayesianTabMlp(BaseBayesianModel):
         Activation function for the dense layers of the MLP. Currently
         `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported
     prior_sigma_1: float, default = 1.0
-        Prior of the sigma parameter for the first of the two Gaussian
-        distributions that will be mixed to produce the prior weight
-        distribution for each Bayesian linear and embedding layer
+        The prior weight distribution is a scaled mixture of two Gaussian
+        densities:
+
+        .. math::
+           \begin{aligned}
+           P(\mathbf{w}) = \prod_{i=j} \pi N (\mathbf{w}_j | 0, \sigma_{1}^{2}) + (1 - \pi) N (\mathbf{w}_j | 0, \sigma_{2}^{2})
+           \end{aligned}
+
+        This is the prior of the sigma parameter for the first of the two
+        Gaussians that will be mixed to produce the prior weight
+        distribution.
     prior_sigma_2: float, default = 0.002
         Prior of the sigma parameter for the second of the two Gaussian
         distributions that will be mixed to produce the prior weight
diff --git a/pytorch_widedeep/models/image/vision.py b/pytorch_widedeep/models/image/vision.py
@@ -45,7 +45,7 @@ class Vision(nn.Module):
         List of strings containing the names (or substring within the name) of
         the parameters that will be trained. For example, if we use a
         `'resnet18'` pretrainable model and we set ``trainable_params =
-        ['layer4']`` only the parameters of `'layer4'` of the network(and the
+        ['layer4']`` only the parameters of `'layer4'` of the network (and the
         head, as mentioned before) will be trained. Note that setting this or
         the previous parameter involves some knowledge of the architecture
         used.
diff --git a/pytorch_widedeep/models/tabular/mlp/context_attention_mlp.py b/pytorch_widedeep/models/tabular/mlp/context_attention_mlp.py
@@ -68,8 +68,8 @@ class ContextAttentionMLP(BaseTabularModelWithAttention):
         Activation function to be applied to the continuous embeddings, if
         any. `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general is the number of
-        embeddings used to encode the categorical and/or continuous columns
+        The so-called *dimension of the model*. Is the number of embeddings
+        used to encode the categorical and/or continuous columns
     attn_dropout: float, default = 0.2
         Dropout for each attention block
     with_addnorm: bool = False,
diff --git a/pytorch_widedeep/models/tabular/mlp/self_attention_mlp.py b/pytorch_widedeep/models/tabular/mlp/self_attention_mlp.py
@@ -67,7 +67,7 @@ class SelfAttentionMLP(BaseTabularModelWithAttention):
         Activation function to be applied to the continuous embeddings, if
         any. `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general is the number of
+        The so-called *dimension of the model*. Is the number of
         embeddings used to encode the categorical and/or continuous columns
     attn_dropout: float, default = 0.2
         Dropout for each attention block
diff --git a/pytorch_widedeep/models/tabular/transformers/ft_transformer.py b/pytorch_widedeep/models/tabular/transformers/ft_transformer.py
@@ -73,7 +73,7 @@ class FTTransformer(BaseTabularModelWithAttention):
         (See `Linformer: Self-Attention with Linear Complexity
         <https://arxiv.org/abs/2006.04768>`_ ) The compression factor that
         will be used to reduce the input sequence length. If we denote the
-        resulting sequence length as :math:`k`
+        resulting sequence length as
         :math:`k = int(kv_{compression \space factor} \times s)`
         where :math:`s` is the input sequence length.
     kv_sharing: bool, default = False
diff --git a/pytorch_widedeep/models/tabular/transformers/saint.py b/pytorch_widedeep/models/tabular/transformers/saint.py
@@ -64,7 +64,7 @@ class SAINT(BaseTabularModelWithAttention):
         Activation function to be applied to the continuous embeddings, if
         any. `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general is the number of
+        The so-called *dimension of the model*. Is the number of
         embeddings used to encode the categorical and/or continuous columns
     n_heads: int, default = 8
         Number of attention heads per Transformer block
diff --git a/pytorch_widedeep/models/tabular/transformers/tab_fastformer.py b/pytorch_widedeep/models/tabular/transformers/tab_fastformer.py
@@ -66,7 +66,7 @@ class TabFastFormer(BaseTabularModelWithAttention):
         continuous embeddings, if any. `'tanh'`, `'relu'`, `'leaky_relu'` and
         `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general is the number of
+        The so-called *dimension of the model*. Is the number of
         embeddings used to encode the categorical and/or continuous columns
     n_heads: int, default = 8
         Number of attention heads per FastFormer block
diff --git a/pytorch_widedeep/models/tabular/transformers/tab_perceiver.py b/pytorch_widedeep/models/tabular/transformers/tab_perceiver.py
@@ -68,8 +68,8 @@ class TabPerceiver(BaseTabularModelWithAttention):
         Activation function to be applied to the continuous embeddings, if
         any. `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general, is the number of
-        embeddings used to encode the categorical and/or continuous columns.
+        The so-called *dimension of the model*. Is the number of embeddings
+        used to encode the categorical and/or continuous columns.
     n_cross_attns: int, default = 1
         Number of times each perceiver block will cross attend to the input
         data (i.e. number of cross attention components per perceiver block).
diff --git a/pytorch_widedeep/models/tabular/transformers/tab_transformer.py b/pytorch_widedeep/models/tabular/transformers/tab_transformer.py
@@ -72,7 +72,7 @@ class TabTransformer(BaseTabularModelWithAttention):
         Activation function to be applied to the continuous embeddings, if
         any. `'tanh'`, `'relu'`, `'leaky_relu'` and `'gelu'` are supported.
     input_dim: int, default = 32
-        The so-called *dimension of the model*. In general is the number of
+        The so-called *dimension of the model*. Is the number of
         embeddings used to encode the categorical and/or continuous columns
     n_heads: int, default = 8
         Number of attention heads per Transformer block
diff --git a/pytorch_widedeep/models/text/basic_rnn.py b/pytorch_widedeep/models/text/basic_rnn.py
@@ -28,7 +28,7 @@ class BasicRNN(nn.Module):
     embed_trainable: bool, default = True
         Boolean indicating if the pretrained embeddings are trainable
     rnn_type: str, default = 'lstm'
-        String indicating the type of RNN to use. One of 'lstm' or 'gru'
+        String indicating the type of RNN to use. One of `'lstm'` or `'gru'`
     hidden_dim: int, default = 64
         Hidden dim of the RNN
     n_layers: int, default = 3
diff --git a/pytorch_widedeep/preprocessing/tab_preprocessor.py b/pytorch_widedeep/preprocessing/tab_preprocessor.py
@@ -53,6 +53,12 @@ class TabPreprocessor(BasePreprocessor):
         is important to emphasize that all the DL models for tabular data in
         the library also include the possibility of normalising the input
         continuous features via a ``BatchNorm`` or a ``LayerNorm``.
+
+        Param alias: ``scale_cont_cols``
+
+    already_standard: List, default = None
+        List with the name of the continuous cols that do not need to be
+        scaled/standarised.
     auto_embed_dim: bool, default = True
         Boolean indicating whether the embedding dimensions will be
         automatically defined via rule of thumb. See ``embedding_rule``
@@ -61,19 +67,16 @@ class TabPreprocessor(BasePreprocessor):
         If ``auto_embed_dim=True``, this is the choice of embedding rule of
         thumb. Choices are:
 
-        - 'fastai_new' -- :math:`min(600, round(1.6 \times n_{cat}^{0.56}))`
+        - `'fastai_new'` -- :math:`min(600, round(1.6 \times n_{cat}^{0.56}))`
 
-        - 'fastai_old' -- :math:`min(50, (n_{cat}//{2})+1)`
+        - `'fastai_old'` -- :math:`min(50, (n_{cat}//{2})+1)`
 
-        - 'google' -- :math:`min(600, round(n_{cat}^{0.24}))`
+        - `'google'` -- :math:`min(600, round(n_{cat}^{0.24}))`
 
     default_embed_dim: int, default=16
         Dimension for the embeddings if the embed_dim is not provided in the
         ``cat_embed_cols`` parameter and ``auto_embed_dim`` is set to
         ``False``.
-    already_standard: List, default = None
-        List with the name of the continuous cols that do not need to be
-        Standarised.
     with_attention: bool, default = False
         Boolean indicating whether the preprocessed data will be passed to an
         attention-based model. If ``True``, the param ``cat_embed_cols`` must
@@ -140,6 +143,7 @@ class TabPreprocessor(BasePreprocessor):
 
     @Alias("with_attention", "for_transformer")
     @Alias("cat_embed_cols", "embed_cols")
+    @Alias("scale", "scale_cont_cols")
     def __init__(
         self,
         cat_embed_cols: Union[List[str], List[Tuple[str, int]]] = None,