Skip to content

Commit 5c366b7

Browse files
committed
Adding some sparse GRU support
Still need to properly dump as sparse.
1 parent d16a111 commit 5c366b7

File tree

4 files changed

+127
-3
lines changed

4 files changed

+127
-3
lines changed

src/dump_lpcnet.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@
4141
max_conv_inputs = 1
4242
max_mdense_tmp = 1
4343

44-
def printVector(f, vector, name):
44+
def printVector(f, vector, name, dtype='float'):
4545
v = np.reshape(vector, (-1));
4646
#print('static const float ', name, '[', len(v), '] = \n', file=f)
47-
f.write('static const float {}[{}] = {{\n '.format(name, len(v)))
47+
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
4848
for i in range(0, len(v)):
4949
f.write('{}'.format(v[i]))
5050
if (i!=len(v)-1):
@@ -59,11 +59,51 @@ def printVector(f, vector, name):
5959
f.write('\n};\n\n')
6060
return;
6161

62+
def printSparseVector(f, A, name):
63+
N = A.shape[0]
64+
W = np.zeros((0,))
65+
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
66+
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
67+
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
68+
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
69+
printVector(f, diag, name + '_diag')
70+
for i in range(3*N//16):
71+
for j in range(N):
72+
W = np.concatenate([W, A[j, i*16:(i+1)*16]])
73+
printVector(f, W, name)
74+
idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
75+
printVector(f, idx, name + '_idx', dtype='int')
76+
return;
77+
6278
def dump_layer_ignore(self, f, hf):
6379
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
6480
return False
6581
Layer.dump_layer = dump_layer_ignore
6682

83+
def dump_sparse_gru(self, f, hf):
84+
global max_rnn_neurons
85+
name = 'sparse_' + self.name
86+
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
87+
weights = self.get_weights()
88+
printSparseVector(f, weights[1], name + '_recurrent_weights')
89+
printVector(f, weights[-1], name + '_bias')
90+
if hasattr(self, 'activation'):
91+
activation = self.activation.__name__.upper()
92+
else:
93+
activation = 'TANH'
94+
if hasattr(self, 'reset_after') and not self.reset_after:
95+
reset_after = 0
96+
else:
97+
reset_after = 1
98+
neurons = weights[0].shape[1]//3
99+
max_rnn_neurons = max(max_rnn_neurons, neurons)
100+
f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n'
101+
.format(name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
102+
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
103+
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
104+
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
105+
return True
106+
67107
def dump_gru_layer(self, f, hf):
68108
global max_rnn_neurons
69109
name = self.name
@@ -205,6 +245,8 @@ def dump_embedding_layer(self, f, hf):
205245
if layer.dump_layer(f, hf):
206246
layer_list.append(layer.name)
207247

248+
dump_sparse_gru(model.get_layer('gru_a'), f, hf)
249+
208250
hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
209251
hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
210252
hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))

src/lpcnet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ void run_sample_network(NNetState *net, float *pdf, const float *condition, cons
122122
accum_embedding(&gru_a_embed_sig, gru_a_input, last_sig);
123123
accum_embedding(&gru_a_embed_pred, gru_a_input, pred);
124124
accum_embedding(&gru_a_embed_exc, gru_a_input, last_exc);
125-
compute_gru3(&gru_a, net->gru_a_state, gru_a_input);
125+
/*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
126+
compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);
126127
RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
127128
RNN_COPY(&in_b[GRU_A_STATE_SIZE], condition, FEATURE_DENSE2_OUT_SIZE);
128129
compute_gru2(&gru_b, net->gru_b_state, in_b);

src/nnet.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,38 @@ static void gemm_accum16(float *out, const float *weights, int rows, int cols, i
105105
_mm256_storeu_ps (&y[8], vy8);
106106
}
107107
}
108+
static void sparse_gemm_accum16(float *out, const float *weights, int rows, const int *idx, const float *x)
109+
{
110+
int i, j;
111+
for (i=0;i<rows;i+=16)
112+
{
113+
float * restrict y;
114+
int cols;
115+
__m256 vy0, vy8;
116+
y = &out[i];
117+
vy0 = _mm256_loadu_ps(&y[0]);
118+
vy8 = _mm256_loadu_ps(&y[8]);
119+
cols = *idx++;
120+
for (j=0;j<cols;j++)
121+
{
122+
int id;
123+
__m256 vxj;
124+
__m256 vw;
125+
id = *idx++;
126+
vxj = _mm256_broadcast_ss(&x[id]);
127+
128+
vw = _mm256_loadu_ps(&weights[0]);
129+
vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
130+
131+
vw = _mm256_loadu_ps(&weights[8]);
132+
vy8 = _mm256_fmadd_ps(vw, vxj, vy8);
133+
weights += 16;
134+
}
135+
_mm256_storeu_ps (&y[0], vy0);
136+
_mm256_storeu_ps (&y[8], vy8);
137+
}
138+
}
139+
108140
#else
109141
static void gemm_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
110142
{
@@ -358,6 +390,43 @@ void compute_gru3(const GRULayer *gru, float *state, const float *input)
358390
state[i] = h[i];
359391
}
360392

393+
void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *input)
394+
{
395+
int i, k;
396+
int N;
397+
float zrh[3*MAX_RNN_NEURONS];
398+
float recur[3*MAX_RNN_NEURONS];
399+
float *z;
400+
float *r;
401+
float *h;
402+
N = gru->nb_neurons;
403+
z = zrh;
404+
r = &zrh[N];
405+
h = &zrh[2*N];
406+
celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
407+
celt_assert(input != state);
408+
celt_assert(gru->reset_after);
409+
RNN_COPY(zrh, input, 3*N);
410+
for (i=0;i<3*N;i++)
411+
recur[i] = gru->bias[3*N + i];
412+
for (k=0;k<3;k++)
413+
{
414+
for (i=0;i<N;i++)
415+
recur[k*N + i] += gru->diag_weights[k*N + i]*state[i];
416+
}
417+
sparse_gemm_accum16(recur, gru->recurrent_weights, 3*N, gru->idx, state);
418+
for (i=0;i<2*N;i++)
419+
zrh[i] += recur[i];
420+
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
421+
for (i=0;i<N;i++)
422+
h[i] += recur[2*N+i]*r[i];
423+
compute_activation(h, h, N, gru->activation);
424+
for (i=0;i<N;i++)
425+
h[i] = z[i]*state[i] + (1-z[i])*h[i];
426+
for (i=0;i<N;i++)
427+
state[i] = h[i];
428+
}
429+
361430
void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input)
362431
{
363432
int i;

src/nnet.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ typedef struct {
6262
int reset_after;
6363
} GRULayer;
6464

65+
typedef struct {
66+
const float *bias;
67+
const float *diag_weights;
68+
const float *recurrent_weights;
69+
const int *idx;
70+
int nb_neurons;
71+
int activation;
72+
int reset_after;
73+
} SparseGRULayer;
74+
6575
typedef struct {
6676
const float *bias;
6777
const float *input_weights;
@@ -89,6 +99,8 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input);
8999

90100
void compute_gru3(const GRULayer *gru, float *state, const float *input);
91101

102+
void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *input);
103+
92104
void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input);
93105

94106
void compute_embedding(const EmbeddingLayer *layer, float *output, int input);

0 commit comments

Comments
 (0)