microsoft
diff --git a/‎c_reference/include/conv1d.h‎
Lines changed: 26 additions & 16 deletions b/‎c_reference/include/conv1d.h‎
Lines changed: 26 additions & 16 deletions
diff --git a/‎c_reference/include/dscnn.h‎
Lines changed: 12 additions & 8 deletions b/‎c_reference/include/dscnn.h‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎c_reference/include/rnn_bricked.h‎
Lines changed: 3 additions & 3 deletions b/‎c_reference/include/rnn_bricked.h‎
Lines changed: 3 additions & 3 deletions
@@ -15,7 +15,7 @@ typedef struct ConvLayers_Params {
 } ConvLayers_Params;
 
 /**
- * @brief Model definition for the 1D Convolution Layer
+ * @brief Model definition for the 1D Convolution Layer. Currently only for dilation = 1
  * @param[out]   output_signal    pointer to the output signal, size = out_time * out_channels
  * @param[in]    out_time         number of time steps in the output
  * @param[in]    out_channels     number of output channels for the output of the conv layer
@@ -27,19 +27,21 @@ typedef struct ConvLayers_Params {
  *                                E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
  * @param[in]    kernel_size      kernel size of the conv filter
  * @param[in]    params           weights, bias and other essential parameters used to describe the layer
- * @param[in]    activations      an integer to choose the type of activation function.
+ * @param[in]    stride           stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
+ * @param[in]    activation       an integer to choose the type of activation function.
  *                                0: none
  *                                1: sigmoid
  *                                2: tanh
  *                                3: relu
  */
 int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal, 
   unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size, 
-  const void* params, int activations);
+  const void* params, unsigned stride, int activation);
 
 /**
- * @brief Model definition for the 1D Depthwise Convolution Layer
+ * @brief Model definition for the 1D Depthwise Convolution Layer. Currently only for dilation = 1
  * @param[out]   output_signal    pointer to the output signal, size = out_time * in_channels
+ *                                NOTE: out_channels == in_channels for depthwise
  * @param[in]    out_time         number of time steps in the output
  * @param[in]    input_signal     pointer to the input signal. size = in_time * in_channels
  * @param[in]    in_time          number of time steps in the input
@@ -49,18 +51,19 @@ int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const
  *                                E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
  * @param[in]    kernel_size      kernel size of the conv filter
  * @param[in]    params           weights, bias and other essential parameters used to describe the layer
- * @param[in]    activations      an integer to choose the type of activation function.
+ * @param[in]    stride           stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
+ * @param[in]    activation       an integer to choose the type of activation function.
  *                                0: none
  *                                1: sigmoid
  *                                2: tanh
  *                                3: relu
  */
 int conv1d_depth(float* output_signal, unsigned out_time, const float* input_signal, 
   unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size, 
-  const void* params, int activations);
+  const void* params, unsigned stride, int activation);
 
 /**
- * @brief Model parameters for the 1D Low Rank Convolution Layer
+ * @brief Model parameters for the 1D Low Rank Convolution Layer.
  * @var    W1      pointer to the 1st low-rank component of the weights, size = out_channels * rank
  * @var    W2      pointer to the 2nd low-rank component of the weights, size for regular = rank * in_channels * kernel_size, size for depthwise = rank * kernel_size
  * @var    B       pointer to the bias vector for the convolution, shape = [out_channels]
@@ -74,8 +77,9 @@ typedef struct ConvLayers_LR_Params {
 } ConvLayers_LR_Params;
 
 /**
- * @brief Model definition for the 1D Low-Rank Convolution Layer
+ * @brief Model definition for the 1D Low-Rank Convolution Layer. Currently only for dilation = 1
  * @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
+ * @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
  * @param[out]   output_signal    pointer to the output signal, size = out_time * out_channels
  * @param[in]    out_time         number of time steps in the output
  * @param[in]    out_channels     number of output channels for the ouput of the conv layer
@@ -87,20 +91,23 @@ typedef struct ConvLayers_LR_Params {
  *                                E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
  * @param[in]    kernel_size      kernel size of the conv filter
  * @param[in]    params           weights, bias and other essential parameters used to describe the layer
- * @param[in]    activations      an integer to choose the type of activation function.
+ * @param[in]    stride           stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
+ * @param[in]    activation       an integer to choose the type of activation function.
  *                                0: none
  *                                1: sigmoid
  *                                2: tanh
  *                                3: relu
  */
 int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal, 
   unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size, 
-  const void* params, int activations);
+  const void* params, unsigned stride, int activation);
 
 /**
- * @brief Model definition for the 1D Low-Rank Depthwise Convolution Layer
+ * @brief Model definition for the 1D Low-Rank Depthwise Convolution Layer. Currently only for dilation = 1
  * @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
+ * @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
  * @param[out]   output_signal    pointer to the output signal, size = out_time * in_channels
+ *                                NOTE: out_channels == in_channels for depthwise conv
  * @param[in]    out_time         number of time steps in the output
  * @param[in]    input_signal     pointer to the input signal. size = in_time * in_channels
  * @param[in]    in_time          number of time steps in the input
@@ -110,20 +117,22 @@ int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, co
  *                                E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
  * @param[in]    kernel_size      kernel size of the conv filter
  * @param[in]    params           weights, bias and other essential parameters used to describe the layer
- * @param[in]    activations      an integer to choose the type of activation function.
+ * @param[in]    stride           stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
+ * @param[in]    activation       an integer to choose the type of activation function.
  *                                0: none
  *                                1: sigmoid
  *                                2: tanh
  *                                3: relu
  */
 int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_signal, 
   unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size, 
-  const void* params, int activations);
+  const void* params, unsigned stride, int activation);
 
 // Auxiliary Layers
 /**
- * @brief Model definition for the 1D Average Pooling Layer
+ * @brief Model definition for the 1D Average Pooling Layer. Currently only for dilation = 1
  * @param[out]   output_signal    pointer to the output signal, size = out_time * in_channels. Provide Null/0 in case of in-place computation
+ *                                NOTE: out_channels == in_channels for avgpool
  * @param[in]    out_time         number of time steps in the output
  * @param[in]    input_signal     pointer to the input signal. size = in_time * in_channels
  * @param[in]    in_time          number of time steps in the input
@@ -132,15 +141,16 @@ int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_
  *                                Note: padding is applied to both the starting and ending of the input, along the time axis
  *                                E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
  * @param[in]    kernel_size      kernel size of the pool filter
- * @param[in]    activations      an integer to choose the type of activation function.
+ * @param[in]    stride           stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
+ * @param[in]    activation       an integer to choose the type of activation function.
  *                                0: none
  *                                1: sigmoid
  *                                2: tanh
  *                                3: relu
  */
 int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal,
   unsigned in_time, unsigned in_channels,
-  unsigned padding, unsigned kernel_size, int activations);
+  unsigned padding, unsigned kernel_size, unsigned stride, int activation);
 
 /**
  * @brief Model definition for the 1D batch Normalization Layer
 
@@ -21,7 +21,8 @@
  * @param[in]    cnn_padding         padding for the low-rank CNN layer. Note: applied to both sides of the input 
  * @param[in]    cnn_kernel_size     kernel size of the low-rank CNN
  * @param[in]    cnn_params          weights, bias and other essential parameters for the low-rank CNN
- * @param[in]    cnn_activations     an integer to choose the type of activation function.
+ * @param[in]    cnn_stride          stride factor for the low-rank CNN
+ * @param[in]    cnn_activation      an integer to choose the type of activation function.
  *                                   0: none
  *                                   1: sigmoid
  *                                   2: tanh
@@ -31,7 +32,7 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
   unsigned in_time, unsigned in_channels,
   float* mean, float* var, unsigned affine, float* gamma, float* beta, unsigned in_place,
   unsigned cnn_hidden, unsigned cnn_padding, unsigned cnn_kernel_size,
-  const void* cnn_params, int cnn_activations);
+  const void* cnn_params, unsigned cnn_stride, int cnn_activation);
 
 /**
  * @brief Model definition for the 1D Convolution block applied after the RNN
@@ -50,7 +51,8 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
  * @param[in]    depth_cnn_padding      padding for the depth CNN layer. Note: applied to both sides of the input to the depth CNN
  * @param[in]    depth_cnn_kernel_size  kernel size of the depth CNN
  * @param[in]    depth_cnn_params       weights, bias and other essential parameters used to describe the depth CNN
- * @param[in]    depth_cnn_activations  an integer to choose the type of activation function.
+ * @param[in]    depth_cnn_stride       stride factor for the depth CNN
+ * @param[in]    depth_cnn_activation   an integer to choose the type of activation function.
  *                                      0: none
  *                                      1: sigmoid
  *                                      2: tanh
@@ -59,14 +61,16 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
  * @param[in]    point_cnn_padding      padding for the point CNN layer. Note: applied to both sides of the input to the point CNN
  * @param[in]    point_cnn_kernel_size  kernel size of the point CNN
  * @param[in]    point_cnn_params       weights, bias and other essential parameters used to describe the point CNN
- * @param[in]    point_cnn_activations  an integer to choose the type of activation function.
+ * @param[in]    point_cnn_stride       stride factor for the point CNN
+ * @param[in]    point_cnn_activation   an integer to choose the type of activation function.
  *                                      0: none
  *                                      1: sigmoid
  *                                      2: tanh
  *                                      3: relu
  * @param[in]    pool_padding           padding for the pool layer. Note: applied to both sides of the input to the pool 
  * @param[in]    pool_kernel_size       kernel size of the pool
- * @param[in]    pool_activations       an integer to choose the type of activation function.
+ * @param[in]    pool_stride            stride factor for the pool
+ * @param[in]    pool_activation        an integer to choose the type of activation function.
  *                                      0: none
  *                                      1: sigmoid
  *                                      2: tanh
@@ -76,9 +80,9 @@ int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
   unsigned in_time, unsigned in_channels,
   float* mean, float* var, unsigned affine, float* gamma, float* beta, unsigned in_place,
   unsigned depth_cnn_hidden, unsigned depth_cnn_padding, unsigned depth_cnn_kernel_size,
-  const void* depth_cnn_params, int depth_cnn_activations,
+  const void* depth_cnn_params, unsigned depth_cnn_stride, int depth_cnn_activation,
   unsigned point_cnn_hidden, unsigned point_cnn_padding, unsigned point_cnn_kernel_size,
-  const void* point_cnn_params, int point_cnn_activations,
-  unsigned pool_padding, unsigned pool_kernel_size, int pool_activation);
+  const void* point_cnn_params, unsigned point_cnn_stride, int point_cnn_activation,
+  unsigned pool_padding, unsigned pool_kernel_size, unsigned pool_stride, int pool_activation);
 
 #endif
@@ -20,9 +20,9 @@ typedef int (*rnn_t)(float* const, unsigned, const float* const, unsigned,
 // This use of an offset is a way to exploit the nature of bi-direction to bypass the concatenation step typically associated with bi-directional passes
 //
 // Constraints
-// For Bi-Directional use, there are 2 constraints
-// 1) (in_time - window) % hop == 0
-// 2) both the window % hop == 0
+// For Bi-Directional use, there are 3 constraints
+// 1) (in_time - fwd_window) % hop == 0 and (in_time - bwd_window) % hop == 0
+// 2) fwd_window % hop == 0 and bwd_window % hop == 0
 // 3) sample_first_brick and sample_last_brick = 1
 //
 // Violation of these constraints can lead to one of the following issues