@@ -82,76 +82,100 @@ assign config_d.x_cols_iter = config_d.x_cols_lftovr != '0 ? x_cols_iter_nolftov
8282assign config_d.x_rows_iter = config_d.x_rows_lftovr != '0 ? x_rows_iter_nolftovr + 1 : x_rows_iter_nolftovr;
8383
8484// Sequential multiplier x_rows x w_cols
85- logic [31 : 0 ] x_rows_by_w_cols_iter;
86- logic x_rows_by_w_cols_iter_valid, x_rows_by_w_cols_iter_valid_d, x_rows_by_w_cols_iter_valid_q;
87- logic x_rows_by_w_cols_iter_ready;
88- hwpe_ctrl_seq_mult # (
89- .AW ( 16 ),
90- .BW ( 16 )
91- ) i_x_rows_by_w_cols_seqmult (
92- .clk_i ( clk_i ),
93- .rst_ni ( rst_ni ),
94- .clear_i ( clear_i | setback_i ),
95- .start_i ( start_cfg_i ),
96- .a_i ( config_d.x_rows_iter ),
97- .b_i ( config_d.w_cols_iter ),
98- .invert_i ( 1'b0 ),
99- .valid_o ( x_rows_by_w_cols_iter_valid_d ),
100- .ready_o ( x_rows_by_w_cols_iter_ready ),
101- .prod_o ( x_rows_by_w_cols_iter )
102- );
103- always_ff @ (posedge clk_int or negedge rst_ni) begin
104- if (~ rst_ni) begin
105- x_rows_by_w_cols_iter_valid_q <= '0 ;
106- x_rows_by_w_cols_iter_valid <= '0 ;
107- end else if (clear_i | setback_i) begin
85+ logic [31 : 0 ] x_rows_by_w_cols_iter_d, x_rows_by_w_cols_iter_q;
86+ logic x_rows_by_w_cols_iter_valid_d, x_rows_by_w_cols_iter_valid_q;
87+
88+ assign x_rows_by_w_cols_iter_d = start_cfg_i ? config_d.x_rows_iter * config_d.w_cols_iter : x_rows_by_w_cols_iter_q;
89+
90+ always_ff @ (posedge clk_i or negedge rst_ni) begin
91+ if (~ rst_ni) begin
92+ x_rows_by_w_cols_iter_q <= '0 ;
93+ end begin
94+ if (clear_i | setback_i) begin
95+ x_rows_by_w_cols_iter_q <= '0 ;
96+ end else begin
97+ x_rows_by_w_cols_iter_q <= x_rows_by_w_cols_iter_d;
98+ end
99+ end
100+ end
101+
102+ assign x_rows_by_w_cols_iter_valid_d = start_cfg_i;
103+
104+ always_ff @ (posedge clk_i or negedge rst_ni) begin
105+ if (~ rst_ni) begin
108106 x_rows_by_w_cols_iter_valid_q <= '0 ;
109- x_rows_by_w_cols_iter_valid <= '0 ;
110- end else begin
111- x_rows_by_w_cols_iter_valid_q <= x_rows_by_w_cols_iter_valid_d;
112- x_rows_by_w_cols_iter_valid <= ~ x_rows_by_w_cols_iter_valid_q & x_rows_by_w_cols_iter_valid_d;
107+ end begin
108+ if (clear_i | setback_i) begin
109+ x_rows_by_w_cols_iter_valid_q <= '0 ;
110+ end else begin
111+ x_rows_by_w_cols_iter_valid_q <= x_rows_by_w_cols_iter_valid_d;
112+ end
113113 end
114114end
115115
116116// Sequential multiplier x_rows x w_cols x x_cols
117- logic [47 : 0 ] x_rows_by_w_cols_by_x_cols_iter;
118- logic x_rows_by_w_cols_by_x_cols_iter_valid;
119- logic x_rows_by_w_cols_by_x_cols_iter_ready;
120- hwpe_ctrl_seq_mult # (
121- .AW ( 16 ),
122- .BW ( 32 )
123- ) i_x_rows_by_w_cols_by_x_cols_seqmult (
124- .clk_i ( clk_int ),
125- .rst_ni ( rst_ni ),
126- .clear_i ( clear_i | setback_i ),
127- .start_i ( x_rows_by_w_cols_iter_valid ),
128- .a_i ( config_d.x_cols_iter ),
129- .b_i ( x_rows_by_w_cols_iter ),
130- .invert_i ( 1'b0 ),
131- .valid_o ( x_rows_by_w_cols_by_x_cols_iter_valid ),
132- .ready_o ( x_rows_by_w_cols_by_x_cols_iter_ready ),
133- .prod_o ( x_rows_by_w_cols_by_x_cols_iter )
134- );
117+ logic [47 : 0 ] x_rows_by_w_cols_by_x_cols_iter_d, x_rows_by_w_cols_by_x_cols_iter_q;
118+ logic x_rows_by_w_cols_by_x_cols_iter_valid_d, x_rows_by_w_cols_by_x_cols_iter_valid_q;
119+
120+ assign x_rows_by_w_cols_by_x_cols_iter_d = x_rows_by_w_cols_iter_valid_q ? config_d.x_cols_iter * x_rows_by_w_cols_iter_q : x_rows_by_w_cols_by_x_cols_iter_q;
121+
122+ always_ff @ (posedge clk_i or negedge rst_ni) begin
123+ if (~ rst_ni) begin
124+ x_rows_by_w_cols_by_x_cols_iter_q <= '0 ;
125+ end begin
126+ if (clear_i | setback_i) begin
127+ x_rows_by_w_cols_by_x_cols_iter_q <= '0 ;
128+ end else begin
129+ x_rows_by_w_cols_by_x_cols_iter_q <= x_rows_by_w_cols_by_x_cols_iter_d;
130+ end
131+ end
132+ end
133+
134+ assign x_rows_by_w_cols_by_x_cols_iter_valid_d = x_rows_by_w_cols_iter_valid_q;
135+
136+ always_ff @ (posedge clk_i or negedge rst_ni) begin
137+ if (~ rst_ni) begin
138+ x_rows_by_w_cols_by_x_cols_iter_valid_q <= '0 ;
139+ end begin
140+ if (clear_i | setback_i) begin
141+ x_rows_by_w_cols_by_x_cols_iter_valid_q <= '0 ;
142+ end else begin
143+ x_rows_by_w_cols_by_x_cols_iter_valid_q <= x_rows_by_w_cols_by_x_cols_iter_valid_d;
144+ end
145+ end
146+ end
135147
136148// Sequential multiplier x_rows x w_cols x w_rows
137- logic [47 : 0 ] x_rows_by_w_cols_by_w_rows_iter;
138- logic x_rows_by_w_cols_by_w_rows_iter_valid;
139- logic x_rows_by_w_cols_by_w_rows_iter_ready;
140- hwpe_ctrl_seq_mult # (
141- .AW ( 16 ),
142- .BW ( 32 )
143- ) i_x_rows_by_w_cols_by_w_rows_seqmult (
144- .clk_i ( clk_int ),
145- .rst_ni ( rst_ni ),
146- .clear_i ( clear_i | setback_i ),
147- .start_i ( x_rows_by_w_cols_iter_valid ),
148- .a_i ( config_d.w_rows_iter ),
149- .b_i ( x_rows_by_w_cols_iter ),
150- .invert_i ( 1'b0 ),
151- .valid_o ( x_rows_by_w_cols_by_w_rows_iter_valid ),
152- .ready_o ( x_rows_by_w_cols_by_w_rows_iter_ready ),
153- .prod_o ( x_rows_by_w_cols_by_w_rows_iter )
154- );
149+ logic [47 : 0 ] x_rows_by_w_cols_by_w_rows_iter_d, x_rows_by_w_cols_by_w_rows_iter_q;
150+ logic x_rows_by_w_cols_by_w_rows_iter_valid_d, x_rows_by_w_cols_by_w_rows_iter_valid_q;
151+
152+ assign x_rows_by_w_cols_by_w_rows_iter_d = x_rows_by_w_cols_iter_valid_q ? config_d.w_rows_iter * x_rows_by_w_cols_iter_q : x_rows_by_w_cols_by_w_rows_iter_q;
153+
154+ always_ff @ (posedge clk_i or negedge rst_ni) begin
155+ if (~ rst_ni) begin
156+ x_rows_by_w_cols_by_w_rows_iter_q <= '0 ;
157+ end begin
158+ if (clear_i | setback_i) begin
159+ x_rows_by_w_cols_by_w_rows_iter_q <= '0 ;
160+ end else begin
161+ x_rows_by_w_cols_by_w_rows_iter_q <= x_rows_by_w_cols_by_w_rows_iter_d;
162+ end
163+ end
164+ end
165+
166+ assign x_rows_by_w_cols_by_w_rows_iter_valid_d = x_rows_by_w_cols_iter_valid_q;
167+
168+ always_ff @ (posedge clk_i or negedge rst_ni) begin
169+ if (~ rst_ni) begin
170+ x_rows_by_w_cols_by_w_rows_iter_valid_q <= '0 ;
171+ end begin
172+ if (clear_i | setback_i) begin
173+ x_rows_by_w_cols_by_w_rows_iter_valid_q <= '0 ;
174+ end else begin
175+ x_rows_by_w_cols_by_w_rows_iter_valid_q <= x_rows_by_w_cols_by_w_rows_iter_valid_d;
176+ end
177+ end
178+ end
155179
156180// Calculate x_buffer_slots
157181logic [31 : 0 ] buffer_slots;
@@ -164,7 +188,7 @@ assign config_d.x_buffer_slots = ((config_d.x_cols_lftovr % ARRAY_HEIGHT != '0)
164188 buffer_slots) * ARRAY_HEIGHT ;
165189
166190// Calculating the number of total stores
167- assign config_d.tot_stores = x_rows_by_w_cols_iter [15 : 0 ];
191+ assign config_d.tot_stores = x_rows_by_w_cols_iter_q [15 : 0 ];
168192
169193assign config_d.stage_1_rnd_mode = config_d.gemm_ops == MATMUL ? RNE :
170194 config_d.gemm_ops == GEMM ? RNE :
@@ -203,12 +227,12 @@ assign config_d.gemm_selection = config_d.gemm_ops == MATMUL ? 1'b0 : 1'b1;
203227
204228assign config_d.x_d1_stride = ((NumByte* BITW )/ ADDR_W )* (((DATAW / BITW )* x_cols_iter_nolftovr) + config_d.x_cols_lftovr);
205229assign config_d.x_rows_offs = ARRAY_WIDTH * config_d.x_d1_stride;
206- assign config_d.w_tot_len = x_rows_by_w_cols_by_w_rows_iter [31 : 0 ];
230+ assign config_d.w_tot_len = x_rows_by_w_cols_by_w_rows_iter_q [31 : 0 ];
207231assign config_d.w_d0_stride = ((NumByte* BITW )/ ADDR_W )* (((DATAW / BITW )* w_cols_iter_nolftovr) + config_d.w_cols_lftovr);
208- assign config_d.yz_tot_len = ARRAY_WIDTH * x_rows_by_w_cols_iter [15 : 0 ];
232+ assign config_d.yz_tot_len = ARRAY_WIDTH * x_rows_by_w_cols_iter_q [15 : 0 ];
209233assign config_d.yz_d0_stride = config_d.w_d0_stride;
210234assign config_d.yz_d2_stride = ARRAY_WIDTH * config_d.w_d0_stride;
211- assign config_d.tot_x_read = x_rows_by_w_cols_by_x_cols_iter [31 : 0 ];
235+ assign config_d.tot_x_read = x_rows_by_w_cols_by_x_cols_iter_q [31 : 0 ];
212236assign config_d.x_tot_len = '0 ; // not used
213237
214238// register configuration to avoid critical paths (maybe removable!)
@@ -217,7 +241,7 @@ always_ff @(posedge clk_int or negedge rst_ni) begin
217241 config_q <= '0 ;
218242 else if (clear_i)
219243 config_q <= '0 ;
220- else if (x_rows_by_w_cols_by_w_rows_iter_valid & x_rows_by_w_cols_by_w_rows_iter_ready )
244+ else if (x_rows_by_w_cols_by_w_rows_iter_valid_q && x_rows_by_w_cols_by_x_cols_iter_valid_q )
221245 config_q <= config_d;
222246end
223247
@@ -227,8 +251,8 @@ always_ff @(posedge clk_int or negedge rst_ni) begin
227251 valid_o <= '0 ;
228252 else if (clear_i | setback_i)
229253 valid_o <= '0 ;
230- else if (x_rows_by_w_cols_by_w_rows_iter_ready )
231- valid_o <= x_rows_by_w_cols_by_w_rows_iter_valid ;
254+ else if (x_rows_by_w_cols_by_w_rows_iter_valid_q && x_rows_by_w_cols_by_x_cols_iter_valid_q )
255+ valid_o <= x_rows_by_w_cols_by_w_rows_iter_valid_q ;
232256end
233257
234258// re-encode in older RedMulE regfile map
0 commit comments