@@ -20,8 +20,15 @@ class Activation(nn.Module):
2020 A nonlinear activation layer.
2121
2222 Args:
23- func: The type of activation function. Supported values are 'relu',
24- 'elu', 'softplus', 'gelu', 'gelu_borzoi', 'gelu_enformer' and 'exp'. If None, will return nn.Identity.
23+ func: The type of activation function. Supported values are:
24+ - 'relu': Standard ReLU activation
25+ - 'elu': Exponential Linear Unit
26+ - 'softplus': Softplus activation
27+ - 'gelu': Standard GELU activation using PyTorch's default approximation
28+ - 'gelu_borzoi': GELU activation using tanh approximation (different from PyTorch's default)
29+ - 'gelu_enformer': Custom GELU implementation from Enformer
30+ - 'exp': Exponential activation
31+ - None: Returns identity function (no activation)
2532
2633 Raises:
2734 NotImplementedError: If 'func' is not a supported activation function.
@@ -159,6 +166,14 @@ class Norm(nn.Module):
159166 'syncbatch', 'instance', or 'layer'. If None, will return nn.Identity.
160167 in_dim: Number of features in the input tensor.
161168 **kwargs: Additional arguments to pass to the normalization function.
169+ Common arguments include:
170+ - eps: Small constant added to denominator for numerical stability.
171+ Defaults to 1e-5 for all normalization types unless overridden.
172+ - momentum: Value used for the running_mean and running_var computation.
173+ Defaults to 0.1 for batch and sync batch norm.
174+ - affine: If True, adds learnable affine parameters. Defaults to True.
175+ - track_running_stats: If True, tracks running mean and variance.
176+ Defaults to True for batch and sync batch norm.
162177 """
163178
164179 def __init__ (
0 commit comments