Skip to content

Commit d8f9d1b

Browse files
Conv1d, BacthNorm1d and AvgPool1d Layers
1 parent c7349dd commit d8f9d1b

25 files changed

+2268
-3
lines changed

.gitattributes

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,14 @@ c_reference/models/q_scut_head_b_face4_model/mbconv2.h filter=lfs diff=lfs merge
6060
c_reference/models/q_scut_head_b_face4_model/mbconv4.h filter=lfs diff=lfs merge=lfs -text
6161
c_reference/models/q_scut_head_b_face4_model/rnn2.h filter=lfs diff=lfs merge=lfs -text
6262
c_reference/models/q_scut_head_b_face4_model/detection2.h filter=lfs diff=lfs merge=lfs -text
63+
c_reference/tests/kws/keyword_spotting_io_1.h filter=lfs diff=lfs merge=lfs -text
64+
c_reference/tests/kws/keyword_spotting_io_2.h filter=lfs diff=lfs merge=lfs -text
65+
c_reference/tests/kws/keyword_spotting_io_3.h filter=lfs diff=lfs merge=lfs -text
66+
c_reference/tests/conv1d/conv1d_regular/conv_param.h filter=lfs diff=lfs merge=lfs -text
67+
c_reference/tests/conv1d/conv1d_lr/conv_param_lr.h filter=lfs diff=lfs merge=lfs -text
68+
c_reference/tests/conv1d/conv1d_depthwise/conv_param_depth.h filter=lfs diff=lfs merge=lfs -text
69+
c_reference/tests/kws/precnn_params.h filter=lfs diff=lfs merge=lfs -text
70+
c_reference/tests/kws/postcnn_params.h filter=lfs diff=lfs merge=lfs -text
71+
c_reference/tests/kws/rnn_params.h filter=lfs diff=lfs merge=lfs -text
72+
c_reference/tests/rnn_bricked/rnn_params.h filter=lfs diff=lfs merge=lfs -text
73+
c_reference/tests/rnn_bricked/rnn_bricked_io.h filter=lfs diff=lfs merge=lfs -text

c_reference/include/conv1d.h

Lines changed: 243 additions & 0 deletions
Large diffs are not rendered by default.

c_reference/include/dscnn.h

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT license.
3+
4+
#ifndef __DSCNN_H__
5+
#define __DSCNN_H__
6+
7+
// Function pointer for the Conv layer to be passed as a parameter. (conv1d or conv1d_lr only)
8+
typedef int (*conv_layer)(float*, unsigned, unsigned, const float*,
9+
unsigned, unsigned, unsigned, unsigned,
10+
const void*, unsigned, unsigned);
11+
12+
/**
13+
* @brief Model definition for the 1D Convolution block applied before the RNN
14+
* @brief sub-layers : batchnorm1d -> conv1d_lr
15+
* @param[out] output_signal pointer to the final output signal, minimum size = out_time * in_channels. out_time has to be calculated based on the reduction from all the conv and pool layers
16+
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
17+
* @param[in] cnn function pointer for the CNN layer. (any of the conv layers can be passed with appropriate params)
18+
* @param[in] in_time number of time steps in the input_signal
19+
* @param[in] in_channels number of input channels
20+
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
21+
* @param[in] var pointer to the variance for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
22+
* @param[in] affine_config whether the affine operations are applied
23+
* if affine_config = 0, then only mean and var are used
24+
* if affine_config = 1, then mean, var, gamma and beta are used for the final computation.
25+
* if affine_config = 2, then only the gamma and beta are used. gamma = original_gamma/sqrt(var), beta = original_beta - gamma * mean/sqrt(var)
26+
* Note: Use affine_config = 2 for faster calculations. The new gamma and beta would need to be pre-computed, stored and passed
27+
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
28+
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
29+
* @param[in] in_place in-place computation check for the batchnorm. Storage efficient
30+
* @param[in] cnn_hidden hidden state/out_channels dimensions for the low-rank CNN. The final channel size of this block
31+
* @param[in] cnn_padding padding for the low-rank CNN layer. Note: applied to both sides of the input
32+
* @param[in] cnn_kernel_size kernel size of the low-rank CNN
33+
* @param[in] cnn_params weights, bias and other essential parameters for the low-rank CNN
34+
* @param[in] cnn_stride stride factor for the low-rank CNN
35+
* @param[in] cnn_activation an integer to choose the type of activation function.
36+
* 0: none
37+
* 1: sigmoid
38+
* 2: tanh
39+
* 3: relu
40+
*/
41+
int phon_pred_lr_cnn(float* output_signal, float* input_signal,
42+
conv_layer cnn, unsigned in_time, unsigned in_channels,
43+
const float* const mean, const float* const var,
44+
unsigned affine_config, const float* const gamma, const float* const beta, unsigned in_place,
45+
unsigned cnn_hidden, unsigned cnn_padding, unsigned cnn_kernel_size,
46+
const void* cnn_params, unsigned cnn_stride, unsigned cnn_activation);
47+
48+
/**
49+
* @brief Model definition for the 1D Convolution block applied after the RNN
50+
* @brief sub-layers : custom nonlinearity(semi_sigmoid_tanh) -> batchnorm1d -> conv1d_depth -> conv1d_lr -> avgpool1d
51+
* @param[out] output_signal pointer to the final output signal, minimum size = out_time * in_channels. out_time has to be calculated based on the reduction from all the conv and pool layers
52+
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
53+
* @param[in] point_cnn function pointer for the point-wise CNN. (any of the conv layers can be passed with appropriate params)
54+
* @param[in] in_time number of time steps in the input
55+
* @param[in] in_channels number of input channels
56+
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
57+
* @param[in] var pointer to the variance for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
58+
* @param[in] affine_config whether the affine operations are applied
59+
* if affine_config = 0, then only mean and var are used
60+
* if affine_config = 1, then mean, var, gamma and beta are used for the final computation.
61+
* if affine_config = 2, then only the gamma and beta are used. gamma = original_gamma/sqrt(var), beta = original_beta - gamma * mean/sqrt(var)
62+
* Note: Use affine_config = 2 for faster calculations. The new gamma and beta would need to be pre-computed, stored and passed
63+
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
64+
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
65+
* @param[in] in_place in-place computation of the batchnorm. Storage efficient
66+
* @param[in] depth_cnn_padding padding for the depth CNN layer. Note: applied to both sides of the input to the depth CNN
67+
* @param[in] depth_cnn_kernel_size kernel size of the depth CNN
68+
* @param[in] depth_cnn_params weights, bias and other essential parameters used to describe the depth CNN
69+
* @param[in] depth_cnn_stride stride factor for the depth CNN
70+
* @param[in] depth_cnn_activation an integer to choose the type of activation function.
71+
* 0: none
72+
* 1: sigmoid
73+
* 2: tanh
74+
* 3: relu
75+
* @param[in] point_cnn_hidden hidden state/out_channels dimensions for the point CNN. The final channel size of this block
76+
* @param[in] point_cnn_padding padding for the point CNN layer. Note: applied to both sides of the input to the point CNN
77+
* @param[in] point_cnn_kernel_size kernel size of the point CNN
78+
* @param[in] point_cnn_params weights, bias and other essential parameters used to describe the point CNN
79+
* @param[in] point_cnn_stride stride factor for the point CNN
80+
* @param[in] point_cnn_activation an integer to choose the type of activation function.
81+
* 0: none
82+
* 1: sigmoid
83+
* 2: tanh
84+
* 3: relu
85+
* @param[in] pool_padding padding for the pool layer. Note: applied to both sides of the input to the pool
86+
* @param[in] pool_kernel_size kernel size of the pool
87+
* @param[in] pool_stride stride factor for the pool
88+
* @param[in] pool_activation an integer to choose the type of activation function.
89+
* 0: none
90+
* 1: sigmoid
91+
* 2: tanh
92+
* 3: relu
93+
*/
94+
int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
95+
conv_layer point_cnn, unsigned in_time, unsigned in_channels,
96+
const float* const mean, const float* const var,
97+
unsigned affine_config, const float* const gamma, const float* const beta, unsigned in_place,
98+
unsigned depth_cnn_padding, unsigned depth_cnn_kernel_size,
99+
const void* depth_cnn_params, unsigned depth_cnn_stride, unsigned depth_cnn_activation,
100+
unsigned point_cnn_hidden, unsigned point_cnn_padding, unsigned point_cnn_kernel_size,
101+
const void* point_cnn_params, unsigned point_cnn_stride, unsigned point_cnn_activation,
102+
unsigned pool_padding, unsigned pool_kernel_size, unsigned pool_stride, unsigned pool_activation);
103+
104+
#endif

c_reference/include/rnn_bricked.h

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT license.
3+
4+
#ifndef __RNN_BRICKED_H__
5+
#define __RNN_BRICKED_H__
6+
7+
/* All the matrices are stored in the row major format
8+
9+
NOTES for using the layers
10+
-> Single-directional Computation
11+
While using the bricked fastgrnn layers, the user needs to adhered to the two following constraints
12+
1) in_time % hop = 0
13+
2) fwd_window % hop = 0 and bwd_window % hop = 0
14+
15+
Violation of the above two constraints (1 & 2), will cause segmentation faults
16+
The layers first compute all the Wx steps and then compute Uh for all the windows parallelly
17+
Hence, the user needs to adhered to the constraints 1 & 2
18+
19+
-> Bi-directional Computation
20+
For bi-directional cases, there are 2 additionally constraints that would need to be followed
21+
A) sample_first_brick and sample_last_brick = 1
22+
B) An offset of rnn_hidden would need to be given to the output_signal pointer during the backward function call
23+
Each function will only process its given context(forward/backward). The other context will need to be called separately.
24+
E.g : 1st step -> forward(output, ..., input, ..., bi-direction=1, ...)
25+
2nd step -> backward(output + rnn_hidden, ..., input, ..., bi-direction=1, ...)
26+
27+
The two extra constraints (A & B) are only for bi-directional cases and can be ignored if only forward (or only backward) is used
28+
Violating the conditions would cause index mis-matches or data corruption
29+
If the first (last) brick is not sampled, the first few (last few) time steps would be missing in the forward (backward) result
30+
If the offset is not passed during the backward function call, the backward pass will overwrite the forward result (bi-directional case only)
31+
*/
32+
33+
/**
34+
* @brief Model parameters for the 1D Convolution Layer
35+
* @var W1 pointer to first low-rank component of W. shape = [rank * in_dims]
36+
* @var W2 pointer to second low-rank component of W. shape = [rnn_hidden * rank]
37+
* @var wRank rank of W matrix
38+
* @var U1 pointer to first low-rank component of U. shape = [rank * rnn_hidden]
39+
* @var U2 pointer to second low-rank component of U. shape = [rnn_hidden * rank]
40+
* @var uRank rank of U matrix
41+
* @var Bg pointer to bias for sigmoid
42+
* @var Bh pointer to bias for tanh
43+
* @var sigmoid_zeta first weight parameter for update from input from next step
44+
* @var sigmoid_nu second weight parameter for update from input from next step
45+
* @var block_size_w_to_lr block/tile size for the cache. Used for tiled MatMul. For W1 * x
46+
* @var block_size_w_from_lr block/tile size for the cache. Used for tiled MatMul. For W2 * result(W1 * x)
47+
* @var block_size_u_to_lr block/tile size for the cache. Used for tiled MatMul. For U1 * h
48+
* @var block_size_u_from_lr block/tile size for the cache. Used for tiled MatMul. For U2 * result(U1 * h)
49+
*/
50+
typedef struct BrickedFastGRNN_LR_Params {
51+
float* W1;
52+
float* W2;
53+
unsigned wRank;
54+
float* U1;
55+
float* U2;
56+
unsigned uRank;
57+
float* Bg;
58+
float* Bh;
59+
float sigmoid_zeta;
60+
float sigmoid_nu;
61+
unsigned block_size_w_to_lr;
62+
unsigned block_size_w_from_lr;
63+
unsigned block_size_u_to_lr;
64+
unsigned block_size_u_from_lr;
65+
} BrickedFastGRNN_LR_Params;
66+
67+
/** Forward Bricking and application of the forward RNN for an input signal
68+
* @param[out] output_signal pointer to output signal. size = out_time * rnn_hidden
69+
* @param[in] rnn_hidden output dimension for the current cell
70+
* @param[in] input_signal pointer to input signal. size = in_time * in_dims
71+
* @param[in] in_time number of input time steps.
72+
* @param[in] in_dims input dimensions
73+
* @param[in] window window length for each brick. For the final brick, the left over time steps are used(need not be window in length for the last brick)
74+
* @param[in] hop hop distance for between bricks
75+
* @param[in] params pointer to the parameters for the RNN
76+
* @param[in] bi_direction determine if the ouput if for a bi-directional RNN.
77+
* @param[in] sample_first_brick determine if the 1st brick should also be sampled
78+
* -> if = 0, only the last hidden state of each brick is sampled. out_time = (in_time-window)/hop + 1
79+
* -> if = 1, for the 1st brick, we sample every hop index(similar to ::hop). For all the bricks(including the 1st) we sample the final hiddens state. out_time = in_time/hop + 1
80+
*/
81+
int forward_bricked_fastgrnn_lr(float* output_signal, unsigned rnn_hidden,
82+
float* input_signal, unsigned in_time, unsigned in_dims,
83+
unsigned window, unsigned hop, const void* params,
84+
unsigned bi_direction, unsigned sample_first_brick);
85+
86+
/** Backward Bricking and application of the backward RNN for an input signal
87+
* @param[out] output_signal pointer to output signal. size = out_time * rnn_hidden
88+
* @param[in] rnn_hidden output dimension for the current cell
89+
* @param[in] input_signal pointer to input signal. size = in_time * in_dims
90+
* @param[in] in_time number of input time steps.
91+
* @param[in] in_dims input dimensions
92+
* @param[in] window window length for each brick. For the final brick, the left over time steps are used(need not be window in length for the last brick)
93+
* @param[in] hop hop distance for between bricks
94+
* @param[in] params pointer to the parameters for the RNN
95+
* @param[in] bi_direction determine if the ouput if for a bi-directional RNN.
96+
* @param[in] sample_last_brick determine if the last brick should also be sampled
97+
* -> if = 0, only the first(last in reverse) hidden state of each brick is sampled. out_time = (in_time-window)/hop + 1
98+
* -> if = 1, for the last brick, we sample every hop index in reverse(similar to ::hop in reverse). For all the bricks(including the last) we sample the first hiddens state(last in reverse). out_time = in_time/hop + 1
99+
*/
100+
int backward_bricked_fastgrnn_lr(float* output_signal, unsigned rnn_hidden,
101+
float* input_signal, unsigned in_time, unsigned in_dims,
102+
unsigned window, unsigned hop, const void* params,
103+
unsigned bi_direction, unsigned sample_last_brick);
104+
105+
#endif

0 commit comments

Comments
 (0)