Skip to content

Commit ca4beb0

Browse files
committed
LSTM Data handler: Fixed two major bugs in initializing vector (out-of-bound accesses).
1 parent 3dcf1f5 commit ca4beb0

File tree

4 files changed

+64
-48
lines changed

4 files changed

+64
-48
lines changed

include/kernel/u_kernel.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ void KernelU_Pruned(const int num_active_inputs,
424424
num_refinements_xu_dma[i] = num_refinements[i];
425425
}
426426

427+
std::cout << "[INFO] Get total R." << std::endl;
427428
// ===========================================================================
428429
// TODO: Same as non-pruned version -> wrap into a function (be careful to NTu-ZTu)
429430
// ===========================================================================
@@ -439,6 +440,7 @@ void KernelU_Pruned(const int num_active_inputs,
439440
R_total += (num_refinements_init[i] - num_refinements_init[i - 1]) * (num_active_inputs - i);
440441
}
441442

443+
std::cout << "[INFO] X_DAM_in." << std::endl;
442444
// Added
443445
X_DAM_in:
444446
for (int i = 0; i < num_active_inputs; ++i) {
@@ -469,6 +471,7 @@ void KernelU_Pruned(const int num_active_inputs,
469471
}
470472
return nz_idx;
471473
};
474+
std::cout << "[INFO] X_DMA_Dispatcher." << std::endl;
472475
// Changed
473476
int R_prev = 0;
474477
X_DMA_Dispatcher:

include/layers/lstm/lstm_data_handler.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -315,12 +315,20 @@ class AcceleratorBlob {
315315

316316
void InitVector(const bool init_random, const int num_inputs, const int size,
317317
std::vector<std::vector<FixType> >& fix_y, std::vector<std::vector<FloatType> >& y) {
318+
FloatType x;
318319
for (int i = 0; i < num_inputs; ++i) {
319320
// fix_y[i] = new FixType[size]; // svd::AllocateContiguously<FixType>(size);
320321
for (int j = 0; j < size; ++j) {
321-
FloatType tmp = init_random ? 0.00001 * rand() : 0;
322-
y[i][j] = tmp;
323-
fix_y[i][j] = FixType(tmp);
322+
if (std::is_same<short, FixType>::value ||
323+
std::is_same<int, FixType>::value ||
324+
std::is_same<long, FixType>::value ||
325+
std::is_same<long long, FixType>::value) {
326+
x = init_random ? rand() : 0;
327+
} else {
328+
x = init_random ? 0.00001 * rand() : 0;
329+
}
330+
y[i][j] = x;
331+
fix_y[i][j] = FixType(x);
324332
}
325333
}
326334
}
@@ -488,7 +496,7 @@ class AcceleratorBlob {
488496
this->fix_c_curr_.resize(num_inputs, std::vector<FixType>(this->lstm_output_size_));
489497
this->fix_h_prev_.resize(num_inputs, std::vector<FixType>(this->lstm_output_size_));
490498
this->fix_c_prev_.resize(num_inputs, std::vector<FixType>(this->lstm_output_size_));
491-
this->fix_bias_.resize(num_inputs, std::vector<FixType>(this->lstm_output_size_));
499+
this->fix_bias_.resize(num_inputs, std::vector<FixType>(kNumGates / 2 * this->lstm_output_size_));
492500
this->x_.resize(num_inputs, std::vector<FloatType>(this->lstm_input_size_));
493501
this->h_.resize(num_inputs, std::vector<FloatType>(this->lstm_output_size_));
494502
this->c_.resize(num_inputs, std::vector<FloatType>(this->lstm_output_size_));
@@ -510,10 +518,8 @@ class AcceleratorBlob {
510518
// this->fix_s_[i] = new FixType[kS_TotalSize]; // svd::AllocateContiguously<FixType>(kS_TotalSize);
511519
// }
512520
this->fix_s_.resize(num_inputs, std::vector<FixType>(kS_TotalSize));
513-
514-
515-
int idx = 0;
516521
for (int i = 0; i < num_inputs; ++i) {
522+
int idx = 0;
517523
for (int j = 0; j < refinement_steps; ++j) {
518524
for (auto g : this->cur_gates_) {
519525
this->fix_s_[i][idx] = g.second->get_s(i).fix_data()[j];

include/math_utils/data_handler.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,10 @@ class VectorBlob {
113113
for (int i = 0; i < refinement_steps; ++i) {
114114
this->fix_nz_idx_.push_back(~IdxType(0));
115115
this->fix_z_idx_.push_back(~IdxType(0));
116-
this->nz_idx_.push_back(-1);
117-
this->z_idx_.push_back(-1);
116+
for (int j = 0; j < num_tiles; ++j) {
117+
this->nz_idx_.push_back(j);
118+
this->z_idx_.push_back(j);
119+
}
118120
}
119121
if (num_zero_tiles > 0) {
120122
for (int i = 0; i < refinement_steps; ++i) {
@@ -221,8 +223,16 @@ class VectorBlob {
221223
return this->nz_idx_.at(i);
222224
}
223225

226+
/**
227+
* @brief Gets the nz index.
228+
*
229+
* @param[in] r The refinement step
230+
* @param[in] t The non-zero tile index (range 0 to NumT - ZNumT)
231+
*
232+
* @return The nz index.
233+
*/
224234
int get_nz_idx(const int r, const int t) {
225-
return this->nz_idx_.at(r * this->num_tiles_ + t);
235+
return this->nz_idx_.at(r * (this->num_tiles_ - this->num_zero_tiles_) + t);
226236
}
227237

228238
IdxType* get_fix_z_idx() {

src/testbenches/test_u_kernel_pruned.cpp

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,26 @@ int main(int argc, char const *argv[]) {
2121
return 0;
2222
#else
2323
const int max_num_refinements = testu::params::R;
24-
hls::vector<int, testu::params::N> num_refinements_vect = hls::vector<int, testu::params::N>(max_num_refinements);
24+
int num_refinements[testu::params::N] = {max_num_refinements};
2525
for (int i = testu::params::N - 1; i >= 0; --i) {
2626
int R_tmp = testu::params::R - 2 * (testu::params::N - i - 1);
27-
num_refinements_vect[i] = R_tmp > 0 ? R_tmp : 1;
27+
num_refinements[i] = R_tmp > 0 ? R_tmp : 1;
2828
}
29-
const int kNumActiveInputs = 1; // testu::params::N;
30-
const int kInputSize_tmp = testu::params::I / 1;
31-
const int kInputSize = (kInputSize_tmp > testu::params::I) ? testu::params::I : kInputSize_tmp;
32-
const int kNumTilesU = kInputSize / testu::params::Tu;
33-
3429
const int kN = testu::params::N;
3530
const int kR = testu::params::R;
3631
const int kI = testu::params::I;
3732
const int kH = testu::params::H;
33+
const int kTu = testu::params::Tu;
3834
const int kNTu = testu::params::MaxNumTu;
3935
const int kZTu = testu::params::ZTu;
4036
const int kNTv = testu::params::MaxNumTv;
4137
const int kZTv = testu::params::ZTv;
4238

39+
const int kNumActiveInputs = 1; // testu::params::N;
40+
const int kInputSize_tmp = testu::params::I / 1;
41+
const int kInputSize = (kInputSize_tmp > testu::params::I) ? testu::params::I : kInputSize_tmp;
42+
const int kNumTilesU = kInputSize / testu::params::Tu;
43+
4344
typedef typename testu::params::ActivationD ActivationType;
4445
typedef hls::vector<ActivationType, testu::params::N> VectN_Type;
4546
typedef hls::vector<ActivationType, testu::params::G> VectG_Type;
@@ -90,17 +91,6 @@ int main(int argc, char const *argv[]) {
9091
int tmp = i_gate->get_nz_idx(0, 0);
9192
std::cout << tmp << std::endl;
9293

93-
94-
std::cout << "x setup." << std::endl;
95-
for (int i = 0; i < testu::params::N; ++i) {
96-
for (int j = 0; j < testu::params::I; ++j) {
97-
if (std::is_same<short, ActivationType>::value) {
98-
x[i][j] = ActivationType(rand());
99-
} else {
100-
x[i][j] = ActivationType(rand() * 0.00001);
101-
}
102-
}
103-
}
10494
std::cout << "xu setup." << std::endl;
10595
for (int i = 0; i < max_num_refinements; ++i) {
10696
for (int j = 0; j < testu::params::N; ++j) {
@@ -134,52 +124,59 @@ int main(int argc, char const *argv[]) {
134124
}
135125
}
136126

137-
#if 0
127+
#if 1
138128
const int num_tests = 2;
139129
int num_errors = 0;
140130

141131
std::cout << "[INFO] Starting tests." << std::endl;
142132
for (int t = 0; t < num_tests; ++t) {
143133
// NOTE: The streaming order differs from before! kNumTilesU is swapped with
144134
// testu::params::N.
145-
for (int j = 0; j < kNumTilesU; ++j) {
146-
for (int i = 0; i < kNumActiveInputs; ++i) {
135+
std::cout << "[INFO] Sending x." << std::endl;
136+
for (int j = 0; j < kNumTilesU; ++j) {
137+
for (int i = 0; i < kNumActiveInputs; ++i) {
147138
VectTuAct_Type x_val;
148139
for (int k = 0; k < testu::params::Tu; ++k) {
149-
x_val[k] = x[i][j * testu::params::Tu + k];
140+
x_val[k] = storage.get_fix_x(i)[j * testu::params::Tu + k];
150141
}
151142
x_interface.PushVector<ActivationType, testu::params::Tu>(x_val);
152143
}
153144
}
154145
// NOTE: The streaming order differs from before! kNumTilesU is swapped with
155146
// testu::params::G.
156-
for (int i = 0; i < num_refinements_vect[kNumActiveInputs - 1]; ++i) {
157-
for (int j = 0; j < kNumTilesU; ++j) {
158-
for (int k = 0; k < testu::params::G; ++k) {
159-
VectTuAct_Type u_val;
160-
for (int ii = 0; ii < testu::params::Tu; ++ii) {
161-
u_val[ii] = u[i][j * testu::params::Tu + ii][k];
162-
}
163-
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
147+
std::cout << "[INFO] Sending u." << std::endl;
148+
for (int i = 0; i < num_refinements[kNumActiveInputs - 1]; ++i) {
149+
for (int j = 0; j < kNumTilesU - kZTu; ++j) {
150+
VectTuAct_Type u_val;
151+
for (int k = 0; k < testu::params::Tu; ++k) {
152+
u_val[k] = i_weight[i * kInputSize + i_gate->get_nz_idx(i, j) * kTu + k];
153+
}
154+
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
155+
for (int k = 0; k < testu::params::Tu; ++k) {
156+
u_val[k] = f_weight[i * kInputSize + f_gate->get_nz_idx(i, j) * kTu + k];
164157
}
158+
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
159+
for (int k = 0; k < testu::params::Tu; ++k) {
160+
u_val[k] = c_weight[i * kInputSize + c_gate->get_nz_idx(i, j) * kTu + k];
161+
}
162+
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
163+
for (int k = 0; k < testu::params::Tu; ++k) {
164+
u_val[k] = o_weight[i * kInputSize + o_gate->get_nz_idx(i, j) * kTu + k];
165+
}
166+
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
165167
}
166168
}
167169
std::cout << "[INFO] Starting HlsKernelU." << std::endl;
168-
169-
int refinements_tmp[testu::params::N];
170-
for (int i = 0; i < testu::params::N; ++i) {
171-
refinements_tmp[i] = num_refinements_vect[i];
172-
}
173170
// HlsKernelU(kNumActiveInputs, kInputSize, refinements_tmp, false, x_axis, u_axis, xu_axis);
174171
const int ztu = 0; // kZTu;
175-
HlsKernelU_Pruned(kNumActiveInputs, kInputSize, refinements_tmp, ztu, unz_idx_axis, x_axis, u_axis, xu_axis);
172+
HlsKernelU_Pruned(kNumActiveInputs, kInputSize, num_refinements, ztu, unz_idx_axis, x_axis, u_axis, xu_axis);
176173

177174
testu::params::VectG_Type xu_g_val;
178175
int total_cnt = 0;
179176
int last_at = -1;
180-
for (int i = 0; i < num_refinements_vect[kNumActiveInputs - 1]; ++i) { // R_max
177+
for (int i = 0; i < num_refinements[kNumActiveInputs - 1]; ++i) { // R_max
181178
for (int j = 0; j < kNumActiveInputs; ++j) {
182-
if (i < num_refinements_vect[j]) {
179+
if (i < num_refinements[j]) {
183180
bool is_last = xu_interface.isLastPopVector<ActivationType, testu::params::G>(xu_g_val);
184181
if (is_last) {
185182
last_at = total_cnt;

0 commit comments

Comments
 (0)