Skip to content

Commit d35b63a

Browse files
Port conv1d codes to matVec format
1 parent 18d062f commit d35b63a

File tree

7 files changed

+314
-103
lines changed

7 files changed

+314
-103
lines changed

c_reference/include/conv1d.h

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
#ifndef __CONV1D_H__
55
#define __CONV1D_H__
66

7+
// Currently dilation is not supported. We have coded separate functions for regular and depthwise conv1d(and low-rank versions). They currently do not support the use of groups
8+
// We use a custom matVec with offset (in utils) for our conv codes. This allows us to decompose the operation using the low-rank property and decrease the run-time
9+
// The unoptimized version would be to first compute the weights and then perform the convolution
10+
711
/**
812
* @brief Model parameters for the 1D Convolution Layer
9-
* @var W pointer to convolution weights W, size for regular = out_channels * in_channels * kernel_size, size for depth based = out_channels * kernel_size
10-
* @var B pointer to the bias vector for the convolution, size = out_channels
13+
* @var W pointer to the flattened conv weights, original shape for regular = [out_channels, kernel_size, in_channels], shape for depthwise = [in_channels, kernel_size, 1]
14+
* @var B pointer to the bias vector, original shape = [out_channels]
1115
*/
1216
typedef struct ConvLayers_Params {
1317
const float* const W;
@@ -34,8 +38,8 @@ typedef struct ConvLayers_Params {
3438
* 2: tanh
3539
* 3: relu
3640
*/
37-
int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
38-
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
41+
int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
42+
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
3943
const void* params, unsigned stride, unsigned activation);
4044

4145
/**
@@ -58,15 +62,15 @@ int conv1d(float* output_signal, unsigned out_time, unsigned out_channels, const
5862
* 2: tanh
5963
* 3: relu
6064
*/
61-
int conv1d_depth(float* output_signal, unsigned out_time, const float* input_signal,
62-
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
65+
int conv1d_depth(float* output_signal, unsigned out_time, const float* input_signal,
66+
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
6367
const void* params, unsigned stride, unsigned activation);
6468

6569
/**
6670
* @brief Model parameters for the 1D Low Rank Convolution Layer.
67-
* @var W1 pointer to the 1st low-rank component of the weights, size = out_channels * rank
68-
* @var W2 pointer to the 2nd low-rank component of the weights, size for regular = rank * in_channels * kernel_size, size for depthwise = rank * kernel_size
69-
* @var B pointer to the bias vector for the convolution, shape = [out_channels]
71+
* @var W1 pointer to the flattened 1st low-rank component of the weights, original shape = [out_channels, rank]. For depthwise out_channels = in_channels
72+
* @var W2 pointer to the flattened 2nd low-rank component of the weights, original shape for regular = [rank, kernel_size, in_channels], shape for depthwise = [rank, kernel_size, 1]
73+
* @var B pointer to the flattened bias vector for the convolution, original shape = [out_channels]
7074
* @var rank rank of the weight tensor. A low-rank decomposition typically used to reduce computation and storage
7175
*/
7276
typedef struct ConvLayers_LR_Params {
@@ -78,8 +82,6 @@ typedef struct ConvLayers_LR_Params {
7882

7983
/**
8084
* @brief Model definition for the 1D Low-Rank Convolution Layer. Currently only for dilation = 1
81-
* @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
82-
* @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
8385
* @param[out] output_signal pointer to the output signal, size = out_time * out_channels
8486
* @param[in] out_time number of time steps in the output
8587
* @param[in] out_channels number of output channels for the ouput of the conv layer
@@ -98,14 +100,12 @@ typedef struct ConvLayers_LR_Params {
98100
* 2: tanh
99101
* 3: relu
100102
*/
101-
int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
102-
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
103+
int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, const float* input_signal,
104+
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
103105
const void* params, unsigned stride, unsigned activation);
104106

105107
/**
106108
* @brief Model definition for the 1D Low-Rank Depthwise Convolution Layer. Currently only for dilation = 1
107-
* @brief Identical to the non-low-rank form. One modification is the multiplication of the weights handled within the layer
108-
* @brief The Weights W1 and W2 are multiplied within the layer using a matmul function from utils. Operation : W1 * W2
109109
* @param[out] output_signal pointer to the output signal, size = out_time * in_channels
110110
* NOTE: out_channels == in_channels for depthwise conv
111111
* @param[in] out_time number of time steps in the output
@@ -123,9 +123,19 @@ int conv1d_lr(float* output_signal, unsigned out_time, unsigned out_channels, co
123123
* 1: sigmoid
124124
* 2: tanh
125125
* 3: relu
126+
Note for the usage of conv1d_depth_lr:
127+
The depthwise with low-rank conv1d code currently uses an unoptimized implementation based on the computation of the conv weights, followed by the depthwise convolution
128+
The reason for using the unoptimized implementation for the depthwise with low-rank conv1d is due to the violation of the depthwise constraints when the low-rank decomposition is applied
129+
The use of depthwise convolution imposes a constraint on the out_channels of the weight matrix. When the low-rank decomposition is applied on top of this matrix, these constraints will be violated
130+
The decomposition converts the depthwise conv into a fully-connected layer and a convolution layer with weight [rank, kernel_size, 1]
131+
The new smaller weight matrix resembles a depthwise conv. But here, typically, in_channels > rank. This causes a violation in the matrix constraints for depthwise convolution
132+
Hence, due to the violation, we cannot split the opeartion and would need to use the unoptimized solution with full-rank weight computation followed by convolution
133+
134+
The depthwise with low-rank code is recommended for extreme storage constraints with no major constraints on the computation cost
135+
For all other puposes, we recommend the use of a combinantion of depthwise conv, low-rank conv and regular conv
126136
*/
127-
int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_signal,
128-
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
137+
int conv1d_depth_lr(float* output_signal, unsigned out_time, const float* input_signal,
138+
unsigned in_time, unsigned in_channels, unsigned padding, unsigned kernel_size,
129139
const void* params, unsigned stride, unsigned activation);
130140

131141
// Auxiliary Layers

c_reference/include/utils.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ void matVec(const float* const mat, const float* const vec,
3131
float alpha, float beta,
3232
float* const ret);
3333

34+
/* Matrix-vector multiplication with a row offset
35+
This function was developed primarily for the conv1d function. This helps bypass the permutation of the time and channel axis
36+
ret is of size nrows, vec is of size ncols
37+
mat is of size nrows * ncols, stored in row major
38+
depthwise is to change the matVec to depthwise specific convolutions
39+
row_stride is the offset factor between two adjacent rows
40+
Note : This matrix-vector multiplication is useful for matrices where a certain number of columns are dropped
41+
For a normal matVec case, this value will be ncols
42+
Eg : for a 400 x 400 matrix and a 100 length vector, we can consider the top 400 x 100 elements for the multiplication. For this eg ncols will be 100 and row_stride will be 400
43+
vec_stride is the offset fector between 2 elements in a vector i.e. the elements of a vector are placed at "n" intervals
44+
For a normal matVec case, this value will be 1
45+
Eg : For matVec with a 400 x 100 matrix a vector of length 100 is needed. So it's possible to enter a 400 length vector and consider every 4th element. For this ncols will be 100 and vec_stride will be 4*/
46+
void offset_matVec_conv1d(const float* mat, const float* vec,
47+
unsigned nrows, unsigned ncols,
48+
unsigned row_stride, unsigned vec_stride,
49+
unsigned depthwise, float* ret);
50+
3451
/* Scaled matrix-matrix multiplication: ret = alpha * ret + beta * matA * matB
3552
matA first matrix; size = nrows * ncommon
3653
matB second matrix; size = ncommon * ncols
@@ -41,7 +58,7 @@ void matVec(const float* const mat, const float* const vec,
4158
beta scaling factor for the result of the multiplication (matA * matB)
4259
ret matrix multiplication output
4360
*/
44-
void matmul(const float* const matA, const float* const matB,
61+
void matMul(const float* const matA, const float* const matB,
4562
unsigned nrows, unsigned ncommon, unsigned ncols,
4663
float alpha, float beta,
4764
float* const ret);

0 commit comments

Comments
 (0)