Skip to content

Commit 496f245

Browse files
author
Andrea Belano
committed
[tiler] Remove serial multipliers
1 parent 64994d8 commit 496f245

File tree

1 file changed

+94
-70
lines changed

1 file changed

+94
-70
lines changed

rtl/redmule_tiler.sv

Lines changed: 94 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -82,76 +82,100 @@ assign config_d.x_cols_iter = config_d.x_cols_lftovr != '0 ? x_cols_iter_nolftov
8282
assign config_d.x_rows_iter = config_d.x_rows_lftovr != '0 ? x_rows_iter_nolftovr + 1 : x_rows_iter_nolftovr;
8383

8484
// Sequential multiplier x_rows x w_cols
85-
logic [31:0] x_rows_by_w_cols_iter;
86-
logic x_rows_by_w_cols_iter_valid, x_rows_by_w_cols_iter_valid_d, x_rows_by_w_cols_iter_valid_q;
87-
logic x_rows_by_w_cols_iter_ready;
88-
hwpe_ctrl_seq_mult #(
89-
.AW ( 16 ),
90-
.BW ( 16 )
91-
) i_x_rows_by_w_cols_seqmult (
92-
.clk_i ( clk_i ),
93-
.rst_ni ( rst_ni ),
94-
.clear_i ( clear_i | setback_i ),
95-
.start_i ( start_cfg_i ),
96-
.a_i ( config_d.x_rows_iter ),
97-
.b_i ( config_d.w_cols_iter ),
98-
.invert_i ( 1'b0 ),
99-
.valid_o ( x_rows_by_w_cols_iter_valid_d ),
100-
.ready_o ( x_rows_by_w_cols_iter_ready ),
101-
.prod_o ( x_rows_by_w_cols_iter )
102-
);
103-
always_ff @(posedge clk_int or negedge rst_ni) begin
104-
if(~rst_ni) begin
105-
x_rows_by_w_cols_iter_valid_q <= '0;
106-
x_rows_by_w_cols_iter_valid <= '0;
107-
end else if(clear_i | setback_i) begin
85+
logic [31:0] x_rows_by_w_cols_iter_d, x_rows_by_w_cols_iter_q;
86+
logic x_rows_by_w_cols_iter_valid_d, x_rows_by_w_cols_iter_valid_q;
87+
88+
assign x_rows_by_w_cols_iter_d = start_cfg_i ? config_d.x_rows_iter * config_d.w_cols_iter : x_rows_by_w_cols_iter_q;
89+
90+
always_ff @(posedge clk_i or negedge rst_ni) begin
91+
if (~rst_ni) begin
92+
x_rows_by_w_cols_iter_q <= '0;
93+
end begin
94+
if (clear_i | setback_i) begin
95+
x_rows_by_w_cols_iter_q <= '0;
96+
end else begin
97+
x_rows_by_w_cols_iter_q <= x_rows_by_w_cols_iter_d;
98+
end
99+
end
100+
end
101+
102+
assign x_rows_by_w_cols_iter_valid_d = start_cfg_i;
103+
104+
always_ff @(posedge clk_i or negedge rst_ni) begin
105+
if (~rst_ni) begin
108106
x_rows_by_w_cols_iter_valid_q <= '0;
109-
x_rows_by_w_cols_iter_valid <= '0;
110-
end else begin
111-
x_rows_by_w_cols_iter_valid_q <= x_rows_by_w_cols_iter_valid_d;
112-
x_rows_by_w_cols_iter_valid <= ~x_rows_by_w_cols_iter_valid_q & x_rows_by_w_cols_iter_valid_d;
107+
end begin
108+
if (clear_i | setback_i) begin
109+
x_rows_by_w_cols_iter_valid_q <= '0;
110+
end else begin
111+
x_rows_by_w_cols_iter_valid_q <= x_rows_by_w_cols_iter_valid_d;
112+
end
113113
end
114114
end
115115

116116
// Sequential multiplier x_rows x w_cols x x_cols
117-
logic [47:0] x_rows_by_w_cols_by_x_cols_iter;
118-
logic x_rows_by_w_cols_by_x_cols_iter_valid;
119-
logic x_rows_by_w_cols_by_x_cols_iter_ready;
120-
hwpe_ctrl_seq_mult #(
121-
.AW ( 16 ),
122-
.BW ( 32 )
123-
) i_x_rows_by_w_cols_by_x_cols_seqmult (
124-
.clk_i ( clk_int ),
125-
.rst_ni ( rst_ni ),
126-
.clear_i ( clear_i | setback_i ),
127-
.start_i ( x_rows_by_w_cols_iter_valid ),
128-
.a_i ( config_d.x_cols_iter ),
129-
.b_i ( x_rows_by_w_cols_iter ),
130-
.invert_i ( 1'b0 ),
131-
.valid_o ( x_rows_by_w_cols_by_x_cols_iter_valid ),
132-
.ready_o ( x_rows_by_w_cols_by_x_cols_iter_ready ),
133-
.prod_o ( x_rows_by_w_cols_by_x_cols_iter )
134-
);
117+
logic [47:0] x_rows_by_w_cols_by_x_cols_iter_d, x_rows_by_w_cols_by_x_cols_iter_q;
118+
logic x_rows_by_w_cols_by_x_cols_iter_valid_d, x_rows_by_w_cols_by_x_cols_iter_valid_q;
119+
120+
assign x_rows_by_w_cols_by_x_cols_iter_d = x_rows_by_w_cols_iter_valid_q ? config_d.x_cols_iter * x_rows_by_w_cols_iter_q : x_rows_by_w_cols_by_x_cols_iter_q;
121+
122+
always_ff @(posedge clk_i or negedge rst_ni) begin
123+
if (~rst_ni) begin
124+
x_rows_by_w_cols_by_x_cols_iter_q <= '0;
125+
end begin
126+
if (clear_i | setback_i) begin
127+
x_rows_by_w_cols_by_x_cols_iter_q <= '0;
128+
end else begin
129+
x_rows_by_w_cols_by_x_cols_iter_q <= x_rows_by_w_cols_by_x_cols_iter_d;
130+
end
131+
end
132+
end
133+
134+
assign x_rows_by_w_cols_by_x_cols_iter_valid_d = x_rows_by_w_cols_iter_valid_q;
135+
136+
always_ff @(posedge clk_i or negedge rst_ni) begin
137+
if (~rst_ni) begin
138+
x_rows_by_w_cols_by_x_cols_iter_valid_q <= '0;
139+
end begin
140+
if (clear_i | setback_i) begin
141+
x_rows_by_w_cols_by_x_cols_iter_valid_q <= '0;
142+
end else begin
143+
x_rows_by_w_cols_by_x_cols_iter_valid_q <= x_rows_by_w_cols_by_x_cols_iter_valid_d;
144+
end
145+
end
146+
end
135147

136148
// Sequential multiplier x_rows x w_cols x w_rows
137-
logic [47:0] x_rows_by_w_cols_by_w_rows_iter;
138-
logic x_rows_by_w_cols_by_w_rows_iter_valid;
139-
logic x_rows_by_w_cols_by_w_rows_iter_ready;
140-
hwpe_ctrl_seq_mult #(
141-
.AW ( 16 ),
142-
.BW ( 32 )
143-
) i_x_rows_by_w_cols_by_w_rows_seqmult (
144-
.clk_i ( clk_int ),
145-
.rst_ni ( rst_ni ),
146-
.clear_i ( clear_i | setback_i ),
147-
.start_i ( x_rows_by_w_cols_iter_valid ),
148-
.a_i ( config_d.w_rows_iter ),
149-
.b_i ( x_rows_by_w_cols_iter ),
150-
.invert_i ( 1'b0 ),
151-
.valid_o ( x_rows_by_w_cols_by_w_rows_iter_valid ),
152-
.ready_o ( x_rows_by_w_cols_by_w_rows_iter_ready ),
153-
.prod_o ( x_rows_by_w_cols_by_w_rows_iter )
154-
);
149+
logic [47:0] x_rows_by_w_cols_by_w_rows_iter_d, x_rows_by_w_cols_by_w_rows_iter_q;
150+
logic x_rows_by_w_cols_by_w_rows_iter_valid_d, x_rows_by_w_cols_by_w_rows_iter_valid_q;
151+
152+
assign x_rows_by_w_cols_by_w_rows_iter_d = x_rows_by_w_cols_iter_valid_q ? config_d.w_rows_iter * x_rows_by_w_cols_iter_q : x_rows_by_w_cols_by_w_rows_iter_q;
153+
154+
always_ff @(posedge clk_i or negedge rst_ni) begin
155+
if (~rst_ni) begin
156+
x_rows_by_w_cols_by_w_rows_iter_q <= '0;
157+
end begin
158+
if (clear_i | setback_i) begin
159+
x_rows_by_w_cols_by_w_rows_iter_q <= '0;
160+
end else begin
161+
x_rows_by_w_cols_by_w_rows_iter_q <= x_rows_by_w_cols_by_w_rows_iter_d;
162+
end
163+
end
164+
end
165+
166+
assign x_rows_by_w_cols_by_w_rows_iter_valid_d = x_rows_by_w_cols_iter_valid_q;
167+
168+
always_ff @(posedge clk_i or negedge rst_ni) begin
169+
if (~rst_ni) begin
170+
x_rows_by_w_cols_by_w_rows_iter_valid_q <= '0;
171+
end begin
172+
if (clear_i | setback_i) begin
173+
x_rows_by_w_cols_by_w_rows_iter_valid_q <= '0;
174+
end else begin
175+
x_rows_by_w_cols_by_w_rows_iter_valid_q <= x_rows_by_w_cols_by_w_rows_iter_valid_d;
176+
end
177+
end
178+
end
155179

156180
// Calculate x_buffer_slots
157181
logic [31:0] buffer_slots;
@@ -164,7 +188,7 @@ assign config_d.x_buffer_slots = ((config_d.x_cols_lftovr % ARRAY_HEIGHT != '0)
164188
buffer_slots) * ARRAY_HEIGHT;
165189

166190
// Calculating the number of total stores
167-
assign config_d.tot_stores = x_rows_by_w_cols_iter[15:0];
191+
assign config_d.tot_stores = x_rows_by_w_cols_iter_q[15:0];
168192

169193
assign config_d.stage_1_rnd_mode = config_d.gemm_ops == MATMUL ? RNE :
170194
config_d.gemm_ops == GEMM ? RNE :
@@ -203,12 +227,12 @@ assign config_d.gemm_selection = config_d.gemm_ops == MATMUL ? 1'b0 : 1'b1;
203227

204228
assign config_d.x_d1_stride = ((NumByte*BITW)/ADDR_W)*(((DATAW/BITW)*x_cols_iter_nolftovr) + config_d.x_cols_lftovr);
205229
assign config_d.x_rows_offs = ARRAY_WIDTH*config_d.x_d1_stride;
206-
assign config_d.w_tot_len = x_rows_by_w_cols_by_w_rows_iter[31:0];
230+
assign config_d.w_tot_len = x_rows_by_w_cols_by_w_rows_iter_q[31:0];
207231
assign config_d.w_d0_stride = ((NumByte*BITW)/ADDR_W)*(((DATAW/BITW)*w_cols_iter_nolftovr) + config_d.w_cols_lftovr);
208-
assign config_d.yz_tot_len = ARRAY_WIDTH*x_rows_by_w_cols_iter[15:0];
232+
assign config_d.yz_tot_len = ARRAY_WIDTH*x_rows_by_w_cols_iter_q[15:0];
209233
assign config_d.yz_d0_stride = config_d.w_d0_stride;
210234
assign config_d.yz_d2_stride = ARRAY_WIDTH*config_d.w_d0_stride;
211-
assign config_d.tot_x_read = x_rows_by_w_cols_by_x_cols_iter[31:0];
235+
assign config_d.tot_x_read = x_rows_by_w_cols_by_x_cols_iter_q[31:0];
212236
assign config_d.x_tot_len = '0; // not used
213237

214238
// register configuration to avoid critical paths (maybe removable!)
@@ -217,7 +241,7 @@ always_ff @(posedge clk_int or negedge rst_ni) begin
217241
config_q <= '0;
218242
else if (clear_i)
219243
config_q <= '0;
220-
else if(x_rows_by_w_cols_by_w_rows_iter_valid & x_rows_by_w_cols_by_w_rows_iter_ready)
244+
else if(x_rows_by_w_cols_by_w_rows_iter_valid_q && x_rows_by_w_cols_by_x_cols_iter_valid_q)
221245
config_q <= config_d;
222246
end
223247

@@ -227,8 +251,8 @@ always_ff @(posedge clk_int or negedge rst_ni) begin
227251
valid_o <= '0;
228252
else if (clear_i | setback_i)
229253
valid_o <= '0;
230-
else if(x_rows_by_w_cols_by_w_rows_iter_ready)
231-
valid_o <= x_rows_by_w_cols_by_w_rows_iter_valid;
254+
else if(x_rows_by_w_cols_by_w_rows_iter_valid_q && x_rows_by_w_cols_by_x_cols_iter_valid_q)
255+
valid_o <= x_rows_by_w_cols_by_w_rows_iter_valid_q;
232256
end
233257

234258
// re-encode in older RedMulE regfile map

0 commit comments

Comments
 (0)