Implement conformer convolution part (#4)

mmz33 · JackTemaki · albertz · web-flow · commit 147c00ca7cdc · 2023-05-18T09:39:00.000+02:00
Co-authored-by: Nick Rossenbach &lt;rossenbach@cs.rwth-aachen.de&gt;
Co-authored-by: Albert Zeyer &lt;albzey@gmail.com&gt;
Co-authored-by: Benedikt Hilmes &lt;hilmes@hltpr.rwth-aachen.de&gt;
Co-authored-by: Eugen Beck &lt;beck@cs.rwth-aachen.de&gt;
diff --git a/i6_models/parts/conformer/convolution.py b/i6_models/parts/conformer/convolution.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import torch
+from torch import nn
+from i6_models.config import ModelConfiguration
+from typing import Callable, Union
+
+
+@dataclass
+class ConformerConvolutionV1Config(ModelConfiguration):
+    channels: int
+    """number of channels for conv layers"""
+    kernel_size: int
+    """kernel size of conv layers"""
+    dropout: float
+    """dropout probability"""
+    activation: Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]
+    """activation function applied after norm"""
+    norm: Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]
+    """normalization layer with input of shape [N,C,T]"""
+
+
+class ConformerConvolutionV1(nn.Module):
+    """
+    Conformer convolution module.
+    see also: https://github.com/espnet/espnet/blob/713e784c0815ebba2053131307db5f00af5159ea/espnet/nets/pytorch_backend/conformer/convolution.py#L13
+    """
+
+    def __init__(self, model_cfg: ConformerConvolutionV1Config):
+        """
+        :param model_cfg: model configuration for this module
+        """
+        super().__init__()
+
+        self.pointwise_conv1 = nn.Linear(in_features=model_cfg.channels, out_features=2 * model_cfg.channels)
+        self.depthwise_conv = nn.Conv1d(
+            in_channels=model_cfg.channels,
+            out_channels=model_cfg.channels,
+            kernel_size=model_cfg.kernel_size,
+            padding="same",
+            groups=model_cfg.channels,
+        )
+        self.pointwise_conv2 = nn.Linear(in_features=model_cfg.channels, out_features=model_cfg.channels)
+        self.layer_norm = nn.LayerNorm(model_cfg.channels)
+        self.norm = model_cfg.norm
+        self.dropout = nn.Dropout(model_cfg.dropout)
+        self.activation = model_cfg.activation
+
+    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
+        """
+        :param tensor: input tensor of shape [B,T,F]
+        :return: torch.Tensor of shape [B,T,F]
+        """
+        tensor = self.layer_norm(tensor)
+        tensor = self.pointwise_conv1(tensor)  # [B,T,2F]
+        tensor = nn.functional.glu(tensor, dim=-1)  # [B,T,F]
+
+        # conv layers expect shape [B,F,T] so we have to transpose here
+        tensor = tensor.transpose(1, 2)  # [B,F,T]
+        tensor = self.depthwise_conv(tensor)
+
+        tensor = self.norm(tensor)
+        tensor = tensor.transpose(1, 2)  # transpose back to [B,T,F]
+
+        tensor = self.activation(tensor)
+        tensor = self.pointwise_conv2(tensor)
+
+        return self.dropout(tensor)
diff --git a/i6_models/parts/conformer/norm.py b/i6_models/parts/conformer/norm.py
@@ -0,0 +1,22 @@
+import torch
+import torch.nn as nn
+
+
+class LayerNormNC(nn.LayerNorm):
+    """
+    LayerNorm that accepts [N,C,*] tensors and normalizes over C (channels) dimension.
+    see here: https://pytorch.org/docs/stable/generated/torch.nn.LayerNorm.html
+    """
+
+    def __init__(self, channels: int):
+        """
+        :param channels: number of channels for normalization
+        """
+        super().__init__(channels)
+
+    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
+        """
+        :param tensor: input tensor with shape [N,C,*]
+        :return: normalized tensor with shape [N,C,*]
+        """
+        return super().forward(tensor.transpose(1, -1)).transpose(1, -1)
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
 typeguard
-torch
+torch
diff --git a/tests/test_conformer.py b/tests/test_conformer.py
@@ -3,10 +3,12 @@
 import torch
 from torch import nn
 
+from i6_models.parts.conformer.convolution import ConformerConvolutionV1, ConformerConvolutionV1Config
 from i6_models.parts.conformer.feedforward import (
     ConformerPositionwiseFeedForwardV1,
     ConformerPositionwiseFeedForwardV1Config,
 )
+from i6_models.parts.conformer.norm import LayerNormNC
 
 
 def test_ConformerPositionwiseFeedForwardV1():
@@ -22,3 +24,43 @@ def get_output_shape(input_shape, input_dim, hidden_dim, dropout, activation):
     ):
         input_shape = (10, 100, input_dim)
         assert get_output_shape(input_shape, input_dim, hidden_dim, dropout, activation) == input_shape
+
+
+def test_conformer_convolution_output_shape():
+    def get_output_shape(batch, time, features, norm=None, kernel_size=31, dropout=0.1, activation=nn.functional.silu):
+        x = torch.randn(batch, time, features)
+        if norm is None:
+            norm = nn.BatchNorm1d(features)
+        cfg = ConformerConvolutionV1Config(
+            channels=features, kernel_size=kernel_size, dropout=dropout, activation=activation, norm=norm
+        )
+        conformer_conv_part = ConformerConvolutionV1(cfg)
+        y = conformer_conv_part(x)
+        return y.shape
+
+    assert get_output_shape(10, 50, 250) == (10, 50, 250)
+    assert get_output_shape(10, 50, 250, activation=nn.functional.relu) == (10, 50, 250)  # different activation
+    assert get_output_shape(10, 50, 250, norm=LayerNormNC(250)) == (10, 50, 250)  # different norm
+    assert get_output_shape(1, 50, 100) == (1, 50, 100)  # test with batch size 1
+    assert get_output_shape(10, 1, 50) == (10, 1, 50)  # time dim 1
+    assert get_output_shape(10, 10, 20, dropout=0.0) == (10, 10, 20)  # dropout 0
+    assert get_output_shape(10, 10, 20, kernel_size=3) == (10, 10, 20)  # odd kernel size
+    assert get_output_shape(10, 10, 20, kernel_size=32) == (10, 10, 20)  # even kernel size
+
+
+def test_layer_norm_nc():
+    torch.manual_seed(42)
+
+    def get_output(shape, norm):
+        x = torch.randn(shape)
+        out = norm(x)
+        return out
+
+    # test with different shape
+    torch_ln = get_output([10, 50, 250], nn.LayerNorm(250))
+    custom_ln = get_output([10, 250, 50], LayerNormNC(250))
+    torch.allclose(torch_ln, custom_ln.transpose(1, 2))
+
+    torch_ln = get_output([10, 8, 23], nn.LayerNorm(23))
+    custom_ln = get_output([10, 23, 8], LayerNormNC(23))
+    torch.allclose(torch_ln, custom_ln.transpose(1, 2))