@@ -165,12 +165,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
165165 // If the reg was not written, the content is unknown. No need to reshuffle
166166 // when writing with != EEW
167167 logic [31 : 0 ] eew_valid_d, eew_valid_q;
168+ // Help for reshuffling
169+ rvv_pkg :: vlmul_e emul_vs1_d, emul_vs1_q, emul_vs2_d, emul_vs2_q;
168170 // Save eew information before reshuffling
169171 rvv_pkg :: vew_e eew_old_buffer_d, eew_old_buffer_q, eew_new_buffer_d, eew_new_buffer_q;
170172 // Helpers to handle reshuffling with LMUL > 1
171- logic [2 : 0 ] rs_lmul_cnt_d, rs_lmul_cnt_q ;
172- logic [2 : 0 ] rs_lmul_cnt_limit_d, rs_lmul_cnt_limit_q ;
173- logic rs_mask_request_d, rs_mask_request_q ;
173+ logic [2 : 0 ] reg_lmul_cnt_d, reg_lmul_cnt_q ;
174+ logic [2 : 0 ] reg_lmul_cnt_limit_d, reg_lmul_cnt_limit_q ;
175+ logic reg_mask_request_d, reg_mask_request_q ;
174176 // Save vreg to be reshuffled before reshuffling
175177 logic [4 : 0 ] vs_buffer_d, vs_buffer_q;
176178 // Keep track of the registers to be reshuffled |vs1|vs2|vd|
@@ -191,13 +193,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
191193 eew_new_buffer_q <= rvv_pkg :: EW8 ;
192194 vs_buffer_q <= '0 ;
193195 reshuffle_req_q <= '0 ;
194- rs_lmul_cnt_q <= '0 ;
195- rs_lmul_cnt_limit_q <= '0 ;
196- rs_mask_request_q <= 1'b0 ;
196+ reg_lmul_cnt_q <= '0 ;
197+ reg_lmul_cnt_limit_q <= '0 ;
198+ reg_mask_request_q <= 1'b0 ;
197199 reshuffle_eew_vs1_q <= rvv_pkg :: EW8 ;
198200 reshuffle_eew_vs2_q <= rvv_pkg :: EW8 ;
199201 reshuffle_eew_vd_q <= rvv_pkg :: EW8 ;
200202 pending_seg_mem_op_q <= 1'b0 ;
203+ emul_vs1_q <= LMUL_1 ;
204+ emul_vs2_q <= LMUL_1 ;
201205 end else begin
202206 state_q <= state_d;
203207 state_qq <= state_q;
@@ -207,13 +211,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
207211 eew_new_buffer_q <= eew_new_buffer_d;
208212 vs_buffer_q <= vs_buffer_d;
209213 reshuffle_req_q <= reshuffle_req_d;
210- rs_lmul_cnt_q <= rs_lmul_cnt_d ;
211- rs_lmul_cnt_limit_q <= rs_lmul_cnt_limit_d ;
212- rs_mask_request_q <= rs_mask_request_d ;
214+ reg_lmul_cnt_q <= reg_lmul_cnt_d ;
215+ reg_lmul_cnt_limit_q <= reg_lmul_cnt_limit_d ;
216+ reg_mask_request_q <= reg_mask_request_d ;
213217 reshuffle_eew_vs1_q <= reshuffle_eew_vs1_d;
214218 reshuffle_eew_vs2_q <= reshuffle_eew_vs2_d;
215219 reshuffle_eew_vd_q <= reshuffle_eew_vd_d;
216220 pending_seg_mem_op_q <= pending_seg_mem_op_d;
221+ emul_vs1_q <= emul_vs1_d;
222+ emul_vs2_q <= emul_vs2_d;
217223 end
218224 end
219225
@@ -385,9 +391,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
385391
386392 pending_seg_mem_op_d = pending_seg_mem_op_q;
387393
388- rs_lmul_cnt_d = '0 ;
389- rs_lmul_cnt_limit_d = '0 ;
390- rs_mask_request_d = 1'b0 ;
394+ reg_lmul_cnt_d = '0 ;
395+ reg_lmul_cnt_limit_d = '0 ;
396+ reg_mask_request_d = 1'b0 ;
391397
392398 illegal_insn = 1'b0 ;
393399 illegal_insn_load = 1'b0 ;
@@ -452,6 +458,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
452458
453459 emul_vs1 = ara_req.emul;
454460 emul_vs2 = ara_req.emul;
461+ emul_vs1_d = emul_vs1_q;
462+ emul_vs2_d = emul_vs2_q;
455463
456464 // Saturation in any lane will raise vxsat flag
457465 csr_vxsat_d | = | vxsat_flag_i;
@@ -489,9 +497,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
489497 acc_resp_o.resp_valid = 1'b0 ;
490498
491499 // Handle LMUL > 1
492- rs_lmul_cnt_d = rs_lmul_cnt_q ;
493- rs_lmul_cnt_limit_d = rs_lmul_cnt_limit_q ;
494- rs_mask_request_d = 1'b0 ;
500+ reg_lmul_cnt_d = reg_lmul_cnt_q ;
501+ reg_lmul_cnt_limit_d = reg_lmul_cnt_limit_q ;
502+ reg_mask_request_d = 1'b0 ;
495503
496504 // Every single reshuffle request refers to LMUL == 1
497505 ara_req.emul = LMUL_1 ;
@@ -502,7 +510,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
502510 // These generate a reshuffle request to Ara's backend
503511 // When LMUL > 1, not all the regs that compose a large
504512 // register should always be reshuffled
505- ara_req_valid = ~ rs_mask_request_q ;
513+ ara_req_valid = ~ reg_mask_request_q ;
506514 ara_req.use_scalar_op = 1'b1 ;
507515 ara_req.vs2 = vs_buffer_q;
508516 ara_req.eew_vs2 = eew_old_buffer_q;
@@ -525,8 +533,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
525533 // Backend ready - Decide what to do next
526534 if (ara_req_ready_i) begin
527535 // Register completely reshuffled
528- if (rs_lmul_cnt_q == rs_lmul_cnt_limit_q ) begin
529- rs_lmul_cnt_d = 0 ;
536+ if (reg_lmul_cnt_q == reg_lmul_cnt_limit_q ) begin
537+ reg_lmul_cnt_d = 0 ;
530538
531539 // Delete the already processed vector register from the notebook -> |vs1|vs2|vd|
532540 unique casez (reshuffle_req_q)
@@ -544,11 +552,13 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
544552 eew_old_buffer_d = eew_q[insn.vmem_type.rs2];
545553 eew_new_buffer_d = reshuffle_eew_vs2_q;
546554 vs_buffer_d = insn.varith_type.rs2;
555+ reg_lmul_cnt_limit_d = emul_vs2_q;
547556 end
548557 3'b100 : begin
549558 eew_old_buffer_d = eew_q[insn.vmem_type.rs1];
550559 eew_new_buffer_d = reshuffle_eew_vs1_q;
551560 vs_buffer_d = insn.varith_type.rs1;
561+ reg_lmul_cnt_limit_d = emul_vs1_q;
552562 end
553563 default : ;
554564 endcase
@@ -570,7 +580,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
570580 // The register is not completely reshuffled (LMUL > 1)
571581 end else begin
572582 // Count up
573- rs_lmul_cnt_d = rs_lmul_cnt_q + 1 ;
583+ reg_lmul_cnt_d = reg_lmul_cnt_q + 1 ;
574584
575585 // Prepare the information to reshuffle the vector registers during the next cycles
576586 // Since LMUL > 1, we should go on and check if the next register needs a reshuffle
@@ -595,7 +605,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
595605 endcase
596606
597607 // Mask the next request if we don't need to reshuffle the next reg
598- if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1 ;
608+ if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1 ;
599609 end
600610 end
601611 end
@@ -1534,16 +1544,20 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
15341544 case (insn.varith_type.rs1)
15351545 5'b00000 : begin
15361546 ara_req.op = ara_pkg :: VMVXS ;
1547+ ara_req.use_vs1 = 1'b0 ;
15371548 ara_req.vl = 1 ;
15381549 // Scalar source
15391550 emul_vs2 = LMUL_1 ;
1551+ ara_req.emul = LMUL_1 ;
15401552 end
15411553 5'b10000 : begin
15421554 ara_req.op = ara_pkg :: VCPOP ;
1555+ ara_req.use_vs1 = 1'b0 ;
15431556 ara_req.eew_vs2 = eew_q[ara_req.vs2];
15441557 end
15451558 5'b10001 : begin
15461559 ara_req.op = ara_pkg :: VFIRST ;
1560+ ara_req.use_vs1 = 1'b0 ;
15471561 ara_req.eew_vs2 = eew_q[ara_req.vs2];
15481562 end
15491563 default : ;
@@ -2030,6 +2044,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
20302044 ara_req.vl = | csr_vl_q ? 1 : '0 ;
20312045 // This instruction ignores LMUL checks
20322046 skip_lmul_checks = 1'b1 ;
2047+ ara_req.emul = LMUL_1 ;
20332048 end
20342049 // Divide instructions
20352050 6'b100000 : ara_req.op = ara_pkg :: VDIVU ;
@@ -2288,6 +2303,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
22882303 ara_req.vstart = '0 ;
22892304 skip_lmul_checks = 1'b1 ;
22902305 ignore_zero_vl_check = 1'b1 ;
2306+ emul_vs2 = LMUL_1 ;
2307+ ara_req.emul = LMUL_1 ;
22912308
22922309 // Zero-extend operands
22932310 unique case (csr_vtype_q.vsew)
@@ -2730,6 +2747,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
27302747 ara_req.vl = | csr_vl_q ? 1 : '0 ;
27312748 // This instruction ignores LMUL checks
27322749 skip_lmul_checks = 1'b1 ;
2750+ ara_req.emul = LMUL_1 ;
27332751 end
27342752 6'b010111 : ara_req.op = ara_pkg :: VMERGE ;
27352753 6'b011000 : begin
@@ -3753,27 +3771,38 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
37533771 endcase
37543772
37553773 // Mask the next request if we don't need to reshuffle the next reg
3756- if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1 ;
3774+ if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1 ;
37573775 end
37583776
37593777 // Reshuffle if at least one of the three registers needs a reshuffle
37603778 if (| reshuffle_req_d) begin
37613779 // Instruction is of one of the RVV types
37623780 automatic rvv_instruction_t insn = rvv_instruction_t ' (instr.instr);
3781+ automatic rvv_pkg :: vlmul_e reshuffle_emul;
37633782
37643783 // Stall the interface, and inject a reshuffling instruction
37653784 acc_resp_o.req_ready = 1'b0 ;
37663785 acc_resp_o.resp_valid = 1'b0 ;
37673786 ara_req_valid = 1'b0 ;
37683787
37693788 // Initialize the reshuffle counter limit to handle LMUL > 1
3770- unique case (ara_req.emul)
3771- LMUL_2 : rs_lmul_cnt_limit_d = 1 ;
3772- LMUL_4 : rs_lmul_cnt_limit_d = 3 ;
3773- LMUL_8 : rs_lmul_cnt_limit_d = 7 ;
3774- default : rs_lmul_cnt_limit_d = 0 ;
3789+ unique casez (reshuffle_req_d)
3790+ 3'b??1 : reshuffle_emul = ara_req.emul;
3791+ 3'b?10 : reshuffle_emul = emul_vs2;
3792+ 3'b100 : reshuffle_emul = emul_vs1;
37753793 endcase
37763794
3795+ unique case (reshuffle_emul)
3796+ LMUL_2 : reg_lmul_cnt_limit_d = 1 ;
3797+ LMUL_4 : reg_lmul_cnt_limit_d = 3 ;
3798+ LMUL_8 : reg_lmul_cnt_limit_d = 7 ;
3799+ default : reg_lmul_cnt_limit_d = 0 ;
3800+ endcase
3801+
3802+ // Save vs1 and vs2 emul for reshuffling. vd, if needed, has been saved already.
3803+ emul_vs1_d = emul_vs1;
3804+ emul_vs2_d = emul_vs2;
3805+
37773806 // Save info for next reshuffles
37783807 reshuffle_eew_vs1_d = ara_req.eew_vs1;
37793808 reshuffle_eew_vs2_d = ara_req.eew_vs2;
0 commit comments