Skip to content

Commit a7985be

Browse files
committed
Data handler: fixed bug on generating nz_idx.
1 parent 4c8dd76 commit a7985be

File tree

8 files changed

+245
-28
lines changed

8 files changed

+245
-28
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ add_compile_definitions(HIDDEN_SIZE=512)
4646
add_compile_definitions(NUM_GATES=4)
4747
add_compile_definitions(NUM_SAMPLES=2)
4848
add_compile_definitions(NUM_TILES_U=8)
49-
add_compile_definitions(NUM_ZERO_TILES_U=0)
49+
add_compile_definitions(NUM_ZERO_TILES_U=2)
5050
add_compile_definitions(NUM_TILES_V=8)
51-
add_compile_definitions(NUM_ZERO_TILES_V=0)
51+
add_compile_definitions(NUM_ZERO_TILES_V=2)
5252
add_compile_definitions(NUM_TIMESTEPS=28)
5353
add_compile_definitions(FIX_WIDTH=16)
5454
add_compile_definitions(FIX_FRACT_WIDTH=5)

include/layers/lstm/lstm_data_handler.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -535,10 +535,8 @@ class AcceleratorBlob {
535535
}
536536

537537
~AcceleratorBlob() {
538-
std::cout << "[INFO] Starting ~AcceleratorBlob()." << std::endl;
539538
// delete[] this->fix_nz_u_; // FREE(this->fix_nz_u_);
540539
// delete[] this->fix_nz_v_; // FREE(this->fix_nz_v_);
541-
std::cout << "[INFO] Freed this->fix_nz_u_ and this->fix_nz_v_." << std::endl;
542540
// for (int i = 0; i < this->lstm_num_inputs_; ++i) {
543541
// delete[] this->fix_x_[i]; // FREE(this->fix_x_[i]);
544542
// delete[] this->fix_h_[i]; // FREE(this->fix_h_[i]);
@@ -553,7 +551,6 @@ class AcceleratorBlob {
553551
// delete[] this->fix_u_cur_; // FREE(this->fix_u_cur_);
554552
// delete[] this->fix_u_rec_; // FREE(this->fix_u_rec_);
555553
// delete[] this->fix_v_; // FREE(this->fix_v_);
556-
std::cout << "[INFO] Freed this->fix_u_cur_, this->fix_u_rec_ and this->fix_v_." << std::endl;
557554
for (auto g : this->cur_gates_) {
558555
delete g.second;
559556
}

include/math_utils/data_handler.h

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,11 @@ class VectorBlob {
113113
for (int i = 0; i < refinement_steps; ++i) {
114114
this->fix_nz_idx_.push_back(~IdxType(0));
115115
this->fix_z_idx_.push_back(~IdxType(0));
116-
for (int j = 0; j < num_tiles; ++j) {
117-
this->nz_idx_.push_back(j);
118-
this->z_idx_.push_back(j);
116+
if (num_zero_tiles == 0) {
117+
for (int j = 0; j < num_tiles; ++j) {
118+
this->nz_idx_.push_back(j);
119+
this->z_idx_.push_back(j);
120+
}
119121
}
120122
}
121123
if (num_zero_tiles > 0) {
@@ -144,7 +146,15 @@ class VectorBlob {
144146
} else {
145147
// Non-pruned tile
146148
for (int k = 0; k < this->num_tile_elems_; ++k) {
147-
FloatType tmp = 0.00001 * rand();
149+
FloatType tmp;
150+
if (std::is_same<short, FixType>::value ||
151+
std::is_same<int, FixType>::value ||
152+
std::is_same<long, FixType>::value ||
153+
std::is_same<long long, FixType>::value) {
154+
tmp = rand();
155+
} else {
156+
tmp = 0.00001 * rand();
157+
}
148158
this->data_.push_back(tmp);
149159
this->pruned_data_.push_back(tmp);
150160
this->fix_data_.push_back(FixType(tmp));
@@ -156,7 +166,15 @@ class VectorBlob {
156166
}
157167
} else {
158168
for (int i = 0; i < this->total_size_; ++i) {
159-
FloatType tmp = 0.00001 * rand();
169+
FloatType tmp;
170+
if (std::is_same<short, FixType>::value ||
171+
std::is_same<int, FixType>::value ||
172+
std::is_same<long, FixType>::value ||
173+
std::is_same<long long, FixType>::value) {
174+
tmp = rand();
175+
} else {
176+
tmp = 0.00001 * rand();
177+
}
160178
this->data_.push_back(tmp);
161179
this->pruned_data_.push_back(tmp);
162180
this->fix_data_.push_back(FixType(tmp));
@@ -209,7 +227,6 @@ class VectorBlob {
209227
return this->z_idx_.at(i);
210228
}
211229

212-
213230
/**
214231
* @brief Gets the nz index.
215232
*
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef TESTBENCHES_TEST_V_KERNEL_PRUNED_H_
2+
#define TESTBENCHES_TEST_V_KERNEL_PRUNED_H_
3+
4+
#include "kernel/v_kernel.h"
5+
6+
#endif // end TESTBENCHES_TEST_V_KERNEL_H_

src/testbenches/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ target_include_directories(TEST_V_KERNEL PUBLIC ${OpenCv_INCLUDE_DIRS})
2121
target_link_libraries(TEST_V_KERNEL ${OpenCv_LIBS})
2222
target_link_libraries(TEST_V_KERNEL V_KERNEL)
2323

24+
add_executable(TEST_V_KERNEL_PRUNED ${CMAKE_SOURCE_DIR}/src/testbenches/test_v_kernel_pruned.cpp)
25+
target_include_directories(TEST_V_KERNEL_PRUNED PUBLIC ${CMAKE_SOURCE_DIR}/include)
26+
target_include_directories(TEST_V_KERNEL_PRUNED PUBLIC ${HLS_INCLUDE_DIRS})
27+
target_include_directories(TEST_V_KERNEL_PRUNED PUBLIC ${OpenCv_INCLUDE_DIRS})
28+
target_link_libraries(TEST_V_KERNEL_PRUNED ${OpenCv_LIBS})
29+
target_link_libraries(TEST_V_KERNEL_PRUNED V_KERNEL)
30+
2431
add_executable(TEST_GEMV_KERNEL ${CMAKE_SOURCE_DIR}/src/testbenches/test_gemv_kernel.cpp)
2532
target_include_directories(TEST_GEMV_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include)
2633
target_include_directories(TEST_GEMV_KERNEL PUBLIC ${HLS_INCLUDE_DIRS})
@@ -51,6 +58,7 @@ target_link_libraries(TEST_SVD_KERNEL SVD_KERNEL)
5158

5259
add_test(NAME TestU_Kernel COMMAND TEST_U_KERNEL)
5360
add_test(NAME TestU_Kernel_Pruned COMMAND TEST_U_KERNEL_PRUNED)
61+
add_test(NAME TestV_Kernel_Pruned COMMAND TEST_V_KERNEL_PRUNED)
5462
add_test(NAME TestV_Kernel COMMAND TEST_V_KERNEL)
5563
add_test(NAME TestGemvKernel COMMAND TEST_GEMV_KERNEL)
5664
add_test(NAME TestDenseSvd COMMAND TEST_DENSE_SVD)

src/testbenches/test_u_kernel_pruned.cpp

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,18 @@ int main(int argc, char const *argv[]) {
3232
const int kH = testu::params::H;
3333
const int kTu = testu::params::Tu;
3434
const int kNTu = testu::params::MaxNumTu;
35-
const int kZTu = testu::params::ZTu;
35+
const int kZTu = 8; // testu::params::ZTu;
3636
const int kNTv = testu::params::MaxNumTv;
3737
const int kZTv = testu::params::ZTv;
3838

3939
const int kNumActiveInputs = 1; // testu::params::N;
40-
const int kInputSize_tmp = testu::params::I / 1;
40+
const int kInputSize_tmp = testu::params::I / 16;
4141
const int kInputSize = (kInputSize_tmp > testu::params::I) ? testu::params::I : kInputSize_tmp;
4242
const int kNumTilesU = kInputSize / testu::params::Tu;
4343

4444
typedef typename testu::params::ActivationD ActivationType;
45+
typedef ap_uint<testu::params::NumGTuBitsAligned> IndexType;
46+
4547
typedef hls::vector<ActivationType, testu::params::N> VectN_Type;
4648
typedef hls::vector<ActivationType, testu::params::G> VectG_Type;
4749
typedef hls::vector<ActivationType, testu::params::Tu> VectTuAct_Type;
@@ -105,23 +107,21 @@ int main(int argc, char const *argv[]) {
105107
auto f_weight = f_gate->fix_data();
106108
auto c_weight = c_gate->fix_data();
107109
auto o_weight = o_gate->fix_data();
110+
auto i_weight_pruned = i_gate->fix_pruned_data();
111+
auto f_weight_pruned = f_gate->fix_pruned_data();
112+
auto c_weight_pruned = c_gate->fix_pruned_data();
113+
auto o_weight_pruned = o_gate->fix_pruned_data();
108114
for (int i = 0; i < max_num_refinements; ++i) {
109115
for (int j = 0; j < kInputSize; ++j) {
116+
// std::cout << i_weight[i * kInputSize + j] << " ";
110117
for (int ii = 0; ii < testu::params::N; ++ii) {
111118
xu[i][ii][0] += i_weight[i * kInputSize + j] * storage.get_fix_x(ii)[j];
112119
xu[i][ii][1] += f_weight[i * kInputSize + j] * storage.get_fix_x(ii)[j];
113120
xu[i][ii][2] += c_weight[i * kInputSize + j] * storage.get_fix_x(ii)[j];
114121
xu[i][ii][3] += o_weight[i * kInputSize + j] * storage.get_fix_x(ii)[j];
115122
}
116123
}
117-
}
118-
std::cout << "[INFO] Generating gold results." << std::endl;
119-
for (int i = 0; i < max_num_refinements; ++i) {
120-
for (int j = 0; j < testu::params::N; ++j) {
121-
for (int k = 0; k < testu::params::G; ++k) {
122-
// xu_gold[i * testu::params::G + k][j] = xu[i][j][k];
123-
}
124-
}
124+
// std::cout << std::endl;
125125
}
126126

127127
#if 1
@@ -149,27 +149,45 @@ int main(int argc, char const *argv[]) {
149149
for (int j = 0; j < kNumTilesU - kZTu; ++j) {
150150
VectTuAct_Type u_val;
151151
for (int k = 0; k < testu::params::Tu; ++k) {
152-
u_val[k] = i_weight[i * kInputSize + i_gate->get_nz_idx(i, j) * kTu + k];
152+
// u_val[k] = i_weight[i * kInputSize + i_gate->get_nz_idx(i, j) * kTu + k];
153+
u_val[k] = i_weight_pruned[i * kInputSize + j * kTu + k];
153154
}
154155
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
155156
for (int k = 0; k < testu::params::Tu; ++k) {
156-
u_val[k] = f_weight[i * kInputSize + f_gate->get_nz_idx(i, j) * kTu + k];
157+
// u_val[k] = f_weight[i * kInputSize + f_gate->get_nz_idx(i, j) * kTu + k];
158+
u_val[k] = f_weight_pruned[i * kInputSize + j * kTu + k];
157159
}
158160
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
159161
for (int k = 0; k < testu::params::Tu; ++k) {
160-
u_val[k] = c_weight[i * kInputSize + c_gate->get_nz_idx(i, j) * kTu + k];
162+
// u_val[k] = c_weight[i * kInputSize + c_gate->get_nz_idx(i, j) * kTu + k];
163+
u_val[k] = c_weight_pruned[i * kInputSize + j * kTu + k];
161164
}
162165
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
163166
for (int k = 0; k < testu::params::Tu; ++k) {
164-
u_val[k] = o_weight[i * kInputSize + o_gate->get_nz_idx(i, j) * kTu + k];
167+
// u_val[k] = o_weight[i * kInputSize + o_gate->get_nz_idx(i, j) * kTu + k];
168+
u_val[k] = o_weight_pruned[i * kInputSize + j * kTu + k];
165169
}
166170
u_interface.PushVector<ActivationType, testu::params::Tu>(u_val);
167171
}
168172
}
173+
174+
std::cout << "[INFO] Sending nzu." << std::endl;
175+
for (int i = 0; i < num_refinements[kNumActiveInputs - 1]; ++i) {
176+
for (int j = 0; j < kNumTilesU - kZTu; ++j) {
177+
const int bits = testu::params::NumTuBits;
178+
IndexType nzu_val;
179+
nzu_val.range(1 * bits - 1, 0 * bits) = i_gate->get_nz_idx(i, j);
180+
nzu_val.range(2 * bits - 1, 1 * bits) = f_gate->get_nz_idx(i, j);
181+
nzu_val.range(3 * bits - 1, 2 * bits) = c_gate->get_nz_idx(i, j);
182+
nzu_val.range(4 * bits - 1, 3 * bits) = o_gate->get_nz_idx(i, j);
183+
// std::cout << i_gate->get_nz_idx(i, j) << std::endl;
184+
unz_idx_interface.Push<IndexType>(nzu_val);
185+
}
186+
}
187+
169188
std::cout << "[INFO] Starting HlsKernelU." << std::endl;
170189
// HlsKernelU(kNumActiveInputs, kInputSize, refinements_tmp, false, x_axis, u_axis, xu_axis);
171-
const int ztu = 0; // kZTu;
172-
HlsKernelU_Pruned(kNumActiveInputs, kInputSize, num_refinements, ztu, unz_idx_axis, x_axis, u_axis, xu_axis);
190+
HlsKernelU_Pruned(kNumActiveInputs, kInputSize, num_refinements, kZTu, unz_idx_axis, x_axis, u_axis, xu_axis);
173191

174192
testu::params::VectG_Type xu_g_val;
175193
int total_cnt = 0;

src/testbenches/test_v_kernel.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ int main(int argc, char const *argv[]) {
131131
const int num_zero_tiles_v = 0;
132132
HlsKernelV_Pruned(kNumActiveInputs, kOutputSize, R_tmp, num_zero_tiles_v, vnz_idx_port, xus_port, v_port, y_port);
133133

134-
135134
std::cout << "[INFO] v_port.size(): " << v_port.size() << std::endl;
136135
}
137136
int num_elems = 0;

0 commit comments

Comments
 (0)