@@ -43,7 +43,7 @@ class ConformerEncoderLayer(nn.Module):
4343 if set different to 0, the number of
4444 depthwise_seperable_out_channel will be used as a
4545 channel_out of the second conv1d layer.
46- otherwise, it equal to 0, the second conv1d layer is skipped.
46+ otherwise, it equals to 0, the second conv1d layer is skipped.
4747 depthwise_multiplier: int
4848 number of input_dim channels duplication. this value
4949 will be used to compute the hidden channels of the Conv1D.
@@ -115,7 +115,7 @@ class ConformerEncoderLayer(nn.Module):
115115 we recalculate activation in backward.
116116 default "".
117117 export: bool, optional
118- if set to True, it remove the padding from convolutional layers
118+ if set to True, it removes the padding from convolutional layers
119119 and allow the onnx conversion for inference.
120120 default False.
121121 use_pt_scaled_dot_product_attention: bool, optional
@@ -686,7 +686,7 @@ class ConformerEncoder(TransformerEncoderBase):
686686 only work for glu_in_attention !=0
687687 default "swish".
688688 export: bool, optional
689- if set to True, it remove the padding from convolutional layers
689+ if set to True, it removes the padding from convolutional layers
690690 and allow the onnx conversion for inference.
691691 default False.
692692 activation_checkpointing: str, optional
0 commit comments