-
Notifications
You must be signed in to change notification settings - Fork 35
Open
Description
Dear maintainers,
I tried the matmul example in README, and I got really werid results, which error is extremely high.
I took a look at the results and I observed that the results are getting bigger and bigger. So I assume, this is due to a bug at the accumulator in the matmul algorithm.
Then I took a look at the genreated PE module and I saw that after the accumulation of local_C[c7][c6], it's output the stream but never cleared.
void PE(int idx, int idy, hls::stream<A_t2> &fifo_A_in, hls::stream<A_t2> &fifo_A_out, hls::stream<B_t2> &fifo_B_in, hls::stream<B_t2> &fifo_B_out, hls::stream<float> &fifo_C_drain_out) {
#pragma HLS INLINE OFF
/* Variable Declaration */
int p0 = idx, p1 = idy; // module id
A_t1 local_A[1][2];
#pragma HLS ARRAY_PARTITION variable=local_A dim=0 complete
B_t1 local_B[1][2];
#pragma HLS ARRAY_PARTITION variable=local_B dim=0 complete
C_t1 local_C[8][8];
#pragma HLS RESOURCE variable=local_C core=RAM_2P_BRAM
/* Variable Declaration */
for (ap_uint<3> c0 = 0; c0 <= 3; c0 += 1)
for (ap_uint<3> c1 = 0; c1 <= 3; c1 += 1)
for (ap_uint<3> c2 = 0; c2 <= 3; c2 += 1) {
// array
// pe
for (ap_uint<4> c5 = 0; c5 <= 7; c5 += 1) {
// latency
for (ap_uint<4> c6 = 0; c6 <= 7; c6 += 1) {
// latency
for (ap_uint<4> c7 = 0; c7 <= 7; c7 += 1) {
#pragma HLS PIPELINE II=1
{
{
A_t2 fifo_data;
fifo_data = fifo_A_in.read();
for (ap_uint<2> n = 0; n < 2; n++) {
#pragma HLS UNROLL
union {unsigned int ui; float ut;} u;
u.ui = (unsigned int)fifo_data(31, 0);
local_A[0][n] = u.ut;
fifo_data = fifo_data >> 32;
}
}
{
B_t2 fifo_data;
fifo_data = fifo_B_in.read();
for (ap_uint<2> n = 0; n < 2; n++) {
#pragma HLS UNROLL
union {unsigned int ui; float ut;} u;
u.ui = (unsigned int)fifo_data(31, 0);
local_B[0][n] = u.ut;
fifo_data = fifo_data >> 32;
}
}
// simd
for (ap_uint<2> c8 = 0; c8 <= 1; c8 += 1) {
#pragma HLS UNROLL
local_C[c7][c6] = (local_C[c7][c6] + (local_A[0][c8] * local_B[0][c8]));
}
if (c2 == 3 && c5 == 7)
fifo_C_drain_out.write(local_C[c7][c6]);
{
B_t2 fifo_data;
union {unsigned int ui; float ut;} u1, u0;
u1.ut = local_B[0][1];
u0.ut = local_B[0][0];
fifo_data = (ap_uint<32>(u1.ui), ap_uint<32>(u0.ui));
fifo_B_out.write(fifo_data);
}
{
A_t2 fifo_data;
union {unsigned int ui; float ut;} u1, u0;
u1.ut = local_A[0][1];
u0.ut = local_A[0][0];
fifo_data = (ap_uint<32>(u1.ui), ap_uint<32>(u0.ui));
fifo_A_out.write(fifo_data);
}
}
}
}
}
}
}
If I add one single line after the stream.write as:
local_C[c7][c6] = 0.0;
the result is perfect, most of which has error of 0 and some of which has error of 1e-06.
Do you plan to fix this bug? I see issue #18 may be the same as this. I haven't tried other examples provided in the repo, but I would assume they have the same bug.
Bests
Metadata
Metadata
Assignees
Labels
No labels