Skip to content

Commit 5435f0f

Browse files
Stride incorporated for Conv1d layers
1 parent 6f0edd1 commit 5435f0f

File tree

13 files changed

+139
-125
lines changed

13 files changed

+139
-125
lines changed

c_reference/include/conv1d.h

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ typedef struct ConvLayers_Params {
1515
} ConvLayers_Params;
1616

1717
/**
18-
* @brief Model definition for the 1D Convolution Layer
18+
* @brief Model definition for the 1D Convolution Layer. Currently only for dilation = 1
1919
* @param[out] output_signal pointer to the output signal, size = out_time * out_channels
2020
* @param[in] out_time number of time steps in the output
2121
* @param[in] out_channels number of output channels for the output of the conv layer
@@ -27,19 +27,21 @@ typedef struct ConvLayers_Params {
2727
* E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
2828
* @param[in] kernel_size kernel size of the conv filter
2929
* @param[in] params weights, bias and other essential parameters used to describe the layer
30-
* @param[in] activations an integer to choose the type of activation function.
30+
* @param[in] stride stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
31+
* @param[in] activation an integer to choose the type of activation function.
3132
* 0: none
3233
* 1: sigmoid
3334
* 2: tanh
3435
* 3: relu
3536
*/
3637
int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
3738
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
38-
const void* params, int activations);
39+
const void* params, unsigned stride, int activation);
3940

4041
/**
41-
* @brief Model definition for the 1D Depthwise Convolution Layer
42+
* @brief Model definition for the 1D Depthwise Convolution Layer. Currently only for dilation = 1
4243
* @param[out] output_signal pointer to the output signal, size = out_time * in_channels
44+
* NOTE: out_channels == in_channels for depthwise
4345
* @param[in] out_time number of time steps in the output
4446
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
4547
* @param[in] in_time number of time steps in the input
@@ -49,18 +51,19 @@ int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const
4951
* E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
5052
* @param[in] kernel_size kernel size of the conv filter
5153
* @param[in] params weights, bias and other essential parameters used to describe the layer
52-
* @param[in] activations an integer to choose the type of activation function.
54+
* @param[in] stride stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
55+
* @param[in] activation an integer to choose the type of activation function.
5356
* 0: none
5457
* 1: sigmoid
5558
* 2: tanh
5659
* 3: relu
5760
*/
5861
int conv1d_depth(float* output_signal, unsigned out_time, const float* input_signal,
5962
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
60-
const void* params, int activations);
63+
const void* params, unsigned stride, int activation);
6164

6265
/**
63-
* @brief Model parameters for the 1D Low Rank Convolution Layer
66+
* @brief Model parameters for the 1D Low Rank Convolution Layer.
6467
* @var W1 pointer to the 1st low-rank component of the weights, size = out_channels * rank
6568
* @var W2 pointer to the 2nd low-rank component of the weights, size for regular = rank * in_channels * kernel_size, size for depthwise = rank * kernel_size
6669
* @var B pointer to the bias vector for the convolution, shape = [out_channels]
@@ -74,8 +77,9 @@ typedef struct ConvLayers_LR_Params {
7477
} ConvLayers_LR_Params;
7578

7679
/**
77-
* @brief Model definition for the 1D Low-Rank Convolution Layer
80+
* @brief Model definition for the 1D Low-Rank Convolution Layer. Currently only for dilation = 1
7881
* @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
82+
* @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
7983
* @param[out] output_signal pointer to the output signal, size = out_time * out_channels
8084
* @param[in] out_time number of time steps in the output
8185
* @param[in] out_channels number of output channels for the ouput of the conv layer
@@ -87,20 +91,23 @@ typedef struct ConvLayers_LR_Params {
8791
* E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
8892
* @param[in] kernel_size kernel size of the conv filter
8993
* @param[in] params weights, bias and other essential parameters used to describe the layer
90-
* @param[in] activations an integer to choose the type of activation function.
94+
* @param[in] stride stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
95+
* @param[in] activation an integer to choose the type of activation function.
9196
* 0: none
9297
* 1: sigmoid
9398
* 2: tanh
9499
* 3: relu
95100
*/
96101
int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
97102
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
98-
const void* params, int activations);
103+
const void* params, unsigned stride, int activation);
99104

100105
/**
101-
* @brief Model definition for the 1D Low-Rank Depthwise Convolution Layer
106+
* @brief Model definition for the 1D Low-Rank Depthwise Convolution Layer. Currently only for dilation = 1
102107
* @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
108+
* @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
103109
* @param[out] output_signal pointer to the output signal, size = out_time * in_channels
110+
* NOTE: out_channels == in_channels for depthwise conv
104111
* @param[in] out_time number of time steps in the output
105112
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
106113
* @param[in] in_time number of time steps in the input
@@ -110,20 +117,22 @@ int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, co
110117
* E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
111118
* @param[in] kernel_size kernel size of the conv filter
112119
* @param[in] params weights, bias and other essential parameters used to describe the layer
113-
* @param[in] activations an integer to choose the type of activation function.
120+
* @param[in] stride stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
121+
* @param[in] activation an integer to choose the type of activation function.
114122
* 0: none
115123
* 1: sigmoid
116124
* 2: tanh
117125
* 3: relu
118126
*/
119127
int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_signal,
120128
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
121-
const void* params, int activations);
129+
const void* params, unsigned stride, int activation);
122130

123131
// Auxiliary Layers
124132
/**
125-
* @brief Model definition for the 1D Average Pooling Layer
133+
* @brief Model definition for the 1D Average Pooling Layer. Currently only for dilation = 1
126134
* @param[out] output_signal pointer to the output signal, size = out_time * in_channels. Provide Null/0 in case of in-place computation
135+
* NOTE: out_channels == in_channels for avgpool
127136
* @param[in] out_time number of time steps in the output
128137
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
129138
* @param[in] in_time number of time steps in the input
@@ -132,15 +141,16 @@ int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_
132141
* Note: padding is applied to both the starting and ending of the input, along the time axis
133142
* E.g : padding = 3, the input is padded with zeros(for 3 time steps), both before the input_signal(time step 0) and after the input_signal(time step in_time).
134143
* @param[in] kernel_size kernel size of the pool filter
135-
* @param[in] activations an integer to choose the type of activation function.
144+
* @param[in] stride stride length for the layer. input_time_iterator += stride for output_time_iterator +=1
145+
* @param[in] activation an integer to choose the type of activation function.
136146
* 0: none
137147
* 1: sigmoid
138148
* 2: tanh
139149
* 3: relu
140150
*/
141151
int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal,
142152
unsigned in_time, unsigned in_channels,
143-
unsigned padding, unsigned kernel_size, int activations);
153+
unsigned padding, unsigned kernel_size, unsigned stride, int activation);
144154

145155
/**
146156
* @brief Model definition for the 1D batch Normalization Layer

c_reference/include/dscnn.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
* @param[in] cnn_padding padding for the low-rank CNN layer. Note: applied to both sides of the input
2222
* @param[in] cnn_kernel_size kernel size of the low-rank CNN
2323
* @param[in] cnn_params weights, bias and other essential parameters for the low-rank CNN
24-
* @param[in] cnn_activations an integer to choose the type of activation function.
24+
* @param[in] cnn_stride stride factor for the low-rank CNN
25+
* @param[in] cnn_activation an integer to choose the type of activation function.
2526
* 0: none
2627
* 1: sigmoid
2728
* 2: tanh
@@ -31,7 +32,7 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
3132
unsigned in_time, unsigned in_channels,
3233
float* mean, float* var, unsigned affine, float* gamma, float* beta, unsigned in_place,
3334
unsigned cnn_hidden, unsigned cnn_padding, unsigned cnn_kernel_size,
34-
const void* cnn_params, int cnn_activations);
35+
const void* cnn_params, unsigned cnn_stride, int cnn_activation);
3536

3637
/**
3738
* @brief Model definition for the 1D Convolution block applied after the RNN
@@ -50,7 +51,8 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
5051
* @param[in] depth_cnn_padding padding for the depth CNN layer. Note: applied to both sides of the input to the depth CNN
5152
* @param[in] depth_cnn_kernel_size kernel size of the depth CNN
5253
* @param[in] depth_cnn_params weights, bias and other essential parameters used to describe the depth CNN
53-
* @param[in] depth_cnn_activations an integer to choose the type of activation function.
54+
* @param[in] depth_cnn_stride stride factor for the depth CNN
55+
* @param[in] depth_cnn_activation an integer to choose the type of activation function.
5456
* 0: none
5557
* 1: sigmoid
5658
* 2: tanh
@@ -59,14 +61,16 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
5961
* @param[in] point_cnn_padding padding for the point CNN layer. Note: applied to both sides of the input to the point CNN
6062
* @param[in] point_cnn_kernel_size kernel size of the point CNN
6163
* @param[in] point_cnn_params weights, bias and other essential parameters used to describe the point CNN
62-
* @param[in] point_cnn_activations an integer to choose the type of activation function.
64+
* @param[in] point_cnn_stride stride factor for the point CNN
65+
* @param[in] point_cnn_activation an integer to choose the type of activation function.
6366
* 0: none
6467
* 1: sigmoid
6568
* 2: tanh
6669
* 3: relu
6770
* @param[in] pool_padding padding for the pool layer. Note: applied to both sides of the input to the pool
6871
* @param[in] pool_kernel_size kernel size of the pool
69-
* @param[in] pool_activations an integer to choose the type of activation function.
72+
* @param[in] pool_stride stride factor for the pool
73+
* @param[in] pool_activation an integer to choose the type of activation function.
7074
* 0: none
7175
* 1: sigmoid
7276
* 2: tanh
@@ -76,9 +80,9 @@ int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
7680
unsigned in_time, unsigned in_channels,
7781
float* mean, float* var, unsigned affine, float* gamma, float* beta, unsigned in_place,
7882
unsigned depth_cnn_hidden, unsigned depth_cnn_padding, unsigned depth_cnn_kernel_size,
79-
const void* depth_cnn_params, int depth_cnn_activations,
83+
const void* depth_cnn_params, unsigned depth_cnn_stride, int depth_cnn_activation,
8084
unsigned point_cnn_hidden, unsigned point_cnn_padding, unsigned point_cnn_kernel_size,
81-
const void* point_cnn_params, int point_cnn_activations,
82-
unsigned pool_padding, unsigned pool_kernel_size, int pool_activation);
85+
const void* point_cnn_params, unsigned point_cnn_stride, int point_cnn_activation,
86+
unsigned pool_padding, unsigned pool_kernel_size, unsigned pool_stride, int pool_activation);
8387

8488
#endif

c_reference/include/rnn_bricked.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ typedef int (*rnn_t)(float* const, unsigned, const float* const, unsigned,
2020
// This use of an offset is a way to exploit the nature of bi-direction to bypass the concatenation step typically associated with bi-directional passes
2121
//
2222
// Constraints
23-
// For Bi-Directional use, there are 2 constraints
24-
// 1) (in_time - window) % hop == 0
25-
// 2) both the window % hop == 0
23+
// For Bi-Directional use, there are 3 constraints
24+
// 1) (in_time - fwd_window) % hop == 0 and (in_time - bwd_window) % hop == 0
25+
// 2) fwd_window % hop == 0 and bwd_window % hop == 0
2626
// 3) sample_first_brick and sample_last_brick = 1
2727
//
2828
// Violation of these constraints can lead to one of the following issues

0 commit comments

Comments
 (0)