Skip to content

Commit 7092a2a

Browse files
committed
Merge branch 'bug_fixes' of https://github.com/vortexgpgpu/vortex into bug_fixes
2 parents ed64fff + 576be2d commit 7092a2a

File tree

2 files changed

+162
-1
lines changed

2 files changed

+162
-1
lines changed

hw/rtl/libs/VX_csa_mod4.sv

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// Copyright © 2019-2023
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
`include "VX_define.vh"
15+
16+
`TRACING_OFF
17+
18+
// Mod-4 Carry-Save Adder Reduction Tree Structure
19+
// Note: Only works for N = 2^(n) + {0,1} where n is an integer >= 2
20+
// N must be 4,5,8,9,16,17,32,33,...
21+
22+
module VX_csa_mod4 #(
23+
parameter N = 9, // Number of operands
24+
parameter W = 8, // Bit-width of each operand
25+
parameter S = W + $clog2(N) // Output width
26+
) (
27+
input wire [N-1:0][W-1:0] operands,
28+
output wire [S-1:0] sum,
29+
output wire cout
30+
);
31+
`STATIC_ASSERT ((N < 4 || ((N & (N-1)) != 0 || ((N-1) & (N-2)) != 0)), ("N must be of the form 2^n + {0,1} where n >= 2"));
32+
33+
// Tree parameters
34+
localparam N_REM = N % 4; // 0 or 1 remainder
35+
localparam N_MOD4 = N - N_REM; // Floor to multiple of 4
36+
localparam NUM_L0 = N_MOD4 >> 2; // Number of initial 4:2 compressors
37+
38+
// Calculate tree depth (number of levels of 4:2)
39+
function automatic integer calc_depth(integer n);
40+
integer d;
41+
d = 0;
42+
while (n > 1) begin
43+
d = d + 1;
44+
n = (n + 1) >> 1;
45+
end
46+
return d;
47+
endfunction
48+
49+
localparam DEPTH = calc_depth(NUM_L0);
50+
localparam TOTAL_LEVELS = DEPTH + N_REM;
51+
localparam WN = W + TOTAL_LEVELS + 2;
52+
53+
wire [NUM_L0-1:0][WN-1:0] level_s[0:DEPTH];
54+
wire [NUM_L0-1:0][WN-1:0] level_c[0:DEPTH];
55+
56+
// Level 0: Initial 4:2 compressors
57+
for (genvar i = 0; i < NUM_L0; i = i + 1) begin : g_level0
58+
wire [W+1:0] s_temp, c_temp;
59+
VX_csa_42 #(
60+
.N(W),
61+
.WIDTH_O(W+2)
62+
) csa_0 (
63+
.a(operands[i*4 + 0]),
64+
.b(operands[i*4 + 1]),
65+
.c(operands[i*4 + 2]),
66+
.d(operands[i*4 + 3]),
67+
.sum(s_temp),
68+
.carry(c_temp)
69+
);
70+
assign level_s[0][i] = WN'(s_temp);
71+
assign level_c[0][i] = WN'(c_temp);
72+
end
73+
74+
// Subsequent pairwise combination levels
75+
for (genvar lev = 1; lev <= DEPTH; lev = lev + 1) begin : g_levels
76+
localparam NUM_PREV = NUM_L0 >> (lev - 1);
77+
localparam NUM_CURR = (NUM_PREV + 1) >> 1;
78+
localparam W_IN = W + lev * 2;
79+
localparam W_OUT = W_IN + 2;
80+
81+
for (genvar i = 0; i < NUM_CURR; i = i + 1) begin : g_comps
82+
localparam HAS_PAIR = (i * 2 + 1) < NUM_PREV;
83+
84+
if (HAS_PAIR) begin : g_has_pair
85+
wire [W_OUT-1:0] s_temp, c_temp;
86+
// Combine two pairs from previous level
87+
VX_csa_42 #(
88+
.N(W_IN),
89+
.WIDTH_O(W_OUT)
90+
) csa_n (
91+
.a(W_IN'(level_s[lev-1][i*2])),
92+
.b(W_IN'(level_c[lev-1][i*2])),
93+
.c(W_IN'(level_s[lev-1][i*2+1])),
94+
.d(W_IN'(level_c[lev-1][i*2+1])),
95+
.sum(s_temp),
96+
.carry(c_temp)
97+
);
98+
assign level_s[lev][i] = WN'(s_temp);
99+
assign level_c[lev][i] = WN'(c_temp);
100+
end
101+
else begin : g_passthrough
102+
// Odd one out
103+
assign level_s[lev][i] = level_s[lev-1][i*2];
104+
assign level_c[lev][i] = level_c[lev-1][i*2];
105+
end
106+
end
107+
end
108+
109+
wire [WN-1:0] tree_sum, tree_carry;
110+
`UNUSED_VAR({tree_sum, tree_carry});
111+
if (DEPTH == 0) begin : g_depth0
112+
assign tree_sum = level_s[0][0];
113+
assign tree_carry = level_c[0][0];
114+
end
115+
else begin : g_depth_n
116+
assign tree_sum = level_s[DEPTH][0];
117+
assign tree_carry = level_c[DEPTH][0];
118+
end
119+
120+
// Handle addend/remainder operand if exists
121+
if (N_REM == 1) begin : g_has_rem
122+
localparam W_FINAL_IN = W + DEPTH * 2 + 2;
123+
localparam W_FINAL_OUT = W_FINAL_IN + 2;
124+
wire [W_FINAL_OUT-1:0] final_sum, final_carry;
125+
`UNUSED_VAR({final_sum, final_carry});
126+
127+
VX_csa_32 #(
128+
.N(W_FINAL_IN),
129+
.WIDTH_O(W_FINAL_OUT)
130+
) csa_rem (
131+
.a(W_FINAL_IN'(tree_sum)),
132+
.b(W_FINAL_IN'(tree_carry)),
133+
.c(W_FINAL_IN'(operands[N-1])),
134+
.sum(final_sum),
135+
.carry(final_carry)
136+
);
137+
138+
VX_ks_adder #(
139+
.N(S)
140+
) ksa_rem (
141+
.dataa(S'(final_sum)),
142+
.datab(S'(final_carry)),
143+
.sum(sum),
144+
.cout(cout)
145+
);
146+
end
147+
else begin : g_no_rem
148+
VX_ks_adder #(
149+
.N(S)
150+
) ksa (
151+
.dataa(S'(tree_sum)),
152+
.datab(S'(tree_carry)),
153+
.sum(sum),
154+
.cout(cout)
155+
);
156+
end
157+
158+
endmodule
159+
160+
`TRACING_ON
161+

hw/rtl/tcu/drl/VX_tcu_drl_acc.sv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ module VX_tcu_drl_acc #(
2929
end
3030

3131
//Carry-Save-Adder based significand accumulation
32-
VX_csa_tree #(
32+
VX_csa_mod4 #(
3333
.N (N),
3434
.W (W),
3535
.S (W-1)

0 commit comments

Comments
 (0)