Skip to content

Commit 24c06ce

Browse files
committed
[hardware] 🐛 Fix EMUL handling when reshffling more regs
1 parent 2325449 commit 24c06ce

File tree

1 file changed

+55
-26
lines changed

1 file changed

+55
-26
lines changed

hardware/src/ara_dispatcher.sv

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
165165
// If the reg was not written, the content is unknown. No need to reshuffle
166166
// when writing with != EEW
167167
logic [31:0] eew_valid_d, eew_valid_q;
168+
// Help for reshuffling
169+
rvv_pkg::vlmul_e emul_vs1_d, emul_vs1_q, emul_vs2_d, emul_vs2_q;
168170
// Save eew information before reshuffling
169171
rvv_pkg::vew_e eew_old_buffer_d, eew_old_buffer_q, eew_new_buffer_d, eew_new_buffer_q;
170172
// Helpers to handle reshuffling with LMUL > 1
171-
logic [2:0] rs_lmul_cnt_d, rs_lmul_cnt_q;
172-
logic [2:0] rs_lmul_cnt_limit_d, rs_lmul_cnt_limit_q;
173-
logic rs_mask_request_d, rs_mask_request_q;
173+
logic [2:0] reg_lmul_cnt_d, reg_lmul_cnt_q;
174+
logic [2:0] reg_lmul_cnt_limit_d, reg_lmul_cnt_limit_q;
175+
logic reg_mask_request_d, reg_mask_request_q;
174176
// Save vreg to be reshuffled before reshuffling
175177
logic [4:0] vs_buffer_d, vs_buffer_q;
176178
// Keep track of the registers to be reshuffled |vs1|vs2|vd|
@@ -191,13 +193,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
191193
eew_new_buffer_q <= rvv_pkg::EW8;
192194
vs_buffer_q <= '0;
193195
reshuffle_req_q <= '0;
194-
rs_lmul_cnt_q <= '0;
195-
rs_lmul_cnt_limit_q <= '0;
196-
rs_mask_request_q <= 1'b0;
196+
reg_lmul_cnt_q <= '0;
197+
reg_lmul_cnt_limit_q <= '0;
198+
reg_mask_request_q <= 1'b0;
197199
reshuffle_eew_vs1_q <= rvv_pkg::EW8;
198200
reshuffle_eew_vs2_q <= rvv_pkg::EW8;
199201
reshuffle_eew_vd_q <= rvv_pkg::EW8;
200202
pending_seg_mem_op_q <= 1'b0;
203+
emul_vs1_q <= LMUL_1;
204+
emul_vs2_q <= LMUL_1;
201205
end else begin
202206
state_q <= state_d;
203207
state_qq <= state_q;
@@ -207,13 +211,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
207211
eew_new_buffer_q <= eew_new_buffer_d;
208212
vs_buffer_q <= vs_buffer_d;
209213
reshuffle_req_q <= reshuffle_req_d;
210-
rs_lmul_cnt_q <= rs_lmul_cnt_d;
211-
rs_lmul_cnt_limit_q <= rs_lmul_cnt_limit_d;
212-
rs_mask_request_q <= rs_mask_request_d;
214+
reg_lmul_cnt_q <= reg_lmul_cnt_d;
215+
reg_lmul_cnt_limit_q <= reg_lmul_cnt_limit_d;
216+
reg_mask_request_q <= reg_mask_request_d;
213217
reshuffle_eew_vs1_q <= reshuffle_eew_vs1_d;
214218
reshuffle_eew_vs2_q <= reshuffle_eew_vs2_d;
215219
reshuffle_eew_vd_q <= reshuffle_eew_vd_d;
216220
pending_seg_mem_op_q <= pending_seg_mem_op_d;
221+
emul_vs1_q <= emul_vs1_d;
222+
emul_vs2_q <= emul_vs2_d;
217223
end
218224
end
219225

@@ -385,9 +391,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
385391

386392
pending_seg_mem_op_d = pending_seg_mem_op_q;
387393

388-
rs_lmul_cnt_d = '0;
389-
rs_lmul_cnt_limit_d = '0;
390-
rs_mask_request_d = 1'b0;
394+
reg_lmul_cnt_d = '0;
395+
reg_lmul_cnt_limit_d = '0;
396+
reg_mask_request_d = 1'b0;
391397

392398
illegal_insn = 1'b0;
393399
illegal_insn_load = 1'b0;
@@ -452,6 +458,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
452458

453459
emul_vs1 = ara_req.emul;
454460
emul_vs2 = ara_req.emul;
461+
emul_vs1_d = emul_vs1_q;
462+
emul_vs2_d = emul_vs2_q;
455463

456464
// Saturation in any lane will raise vxsat flag
457465
csr_vxsat_d |= |vxsat_flag_i;
@@ -489,9 +497,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
489497
acc_resp_o.resp_valid = 1'b0;
490498

491499
// Handle LMUL > 1
492-
rs_lmul_cnt_d = rs_lmul_cnt_q;
493-
rs_lmul_cnt_limit_d = rs_lmul_cnt_limit_q;
494-
rs_mask_request_d = 1'b0;
500+
reg_lmul_cnt_d = reg_lmul_cnt_q;
501+
reg_lmul_cnt_limit_d = reg_lmul_cnt_limit_q;
502+
reg_mask_request_d = 1'b0;
495503

496504
// Every single reshuffle request refers to LMUL == 1
497505
ara_req.emul = LMUL_1;
@@ -502,7 +510,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
502510
// These generate a reshuffle request to Ara's backend
503511
// When LMUL > 1, not all the regs that compose a large
504512
// register should always be reshuffled
505-
ara_req_valid = ~rs_mask_request_q;
513+
ara_req_valid = ~reg_mask_request_q;
506514
ara_req.use_scalar_op = 1'b1;
507515
ara_req.vs2 = vs_buffer_q;
508516
ara_req.eew_vs2 = eew_old_buffer_q;
@@ -525,8 +533,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
525533
// Backend ready - Decide what to do next
526534
if (ara_req_ready_i) begin
527535
// Register completely reshuffled
528-
if (rs_lmul_cnt_q == rs_lmul_cnt_limit_q) begin
529-
rs_lmul_cnt_d = 0;
536+
if (reg_lmul_cnt_q == reg_lmul_cnt_limit_q) begin
537+
reg_lmul_cnt_d = 0;
530538

531539
// Delete the already processed vector register from the notebook -> |vs1|vs2|vd|
532540
unique casez (reshuffle_req_q)
@@ -544,11 +552,13 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
544552
eew_old_buffer_d = eew_q[insn.vmem_type.rs2];
545553
eew_new_buffer_d = reshuffle_eew_vs2_q;
546554
vs_buffer_d = insn.varith_type.rs2;
555+
reg_lmul_cnt_limit_d = emul_vs2_q;
547556
end
548557
3'b100: begin
549558
eew_old_buffer_d = eew_q[insn.vmem_type.rs1];
550559
eew_new_buffer_d = reshuffle_eew_vs1_q;
551560
vs_buffer_d = insn.varith_type.rs1;
561+
reg_lmul_cnt_limit_d = emul_vs1_q;
552562
end
553563
default:;
554564
endcase
@@ -570,7 +580,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
570580
// The register is not completely reshuffled (LMUL > 1)
571581
end else begin
572582
// Count up
573-
rs_lmul_cnt_d = rs_lmul_cnt_q + 1;
583+
reg_lmul_cnt_d = reg_lmul_cnt_q + 1;
574584

575585
// Prepare the information to reshuffle the vector registers during the next cycles
576586
// Since LMUL > 1, we should go on and check if the next register needs a reshuffle
@@ -595,7 +605,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
595605
endcase
596606

597607
// Mask the next request if we don't need to reshuffle the next reg
598-
if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1;
608+
if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1;
599609
end
600610
end
601611
end
@@ -1534,16 +1544,20 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
15341544
case (insn.varith_type.rs1)
15351545
5'b00000: begin
15361546
ara_req.op = ara_pkg::VMVXS;
1547+
ara_req.use_vs1 = 1'b0;
15371548
ara_req.vl = 1;
15381549
// Scalar source
15391550
emul_vs2 = LMUL_1;
1551+
ara_req.emul = LMUL_1;
15401552
end
15411553
5'b10000: begin
15421554
ara_req.op = ara_pkg::VCPOP;
1555+
ara_req.use_vs1 = 1'b0;
15431556
ara_req.eew_vs2 = eew_q[ara_req.vs2];
15441557
end
15451558
5'b10001: begin
15461559
ara_req.op = ara_pkg::VFIRST;
1560+
ara_req.use_vs1 = 1'b0;
15471561
ara_req.eew_vs2 = eew_q[ara_req.vs2];
15481562
end
15491563
default :;
@@ -2030,6 +2044,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
20302044
ara_req.vl = |csr_vl_q ? 1 : '0;
20312045
// This instruction ignores LMUL checks
20322046
skip_lmul_checks = 1'b1;
2047+
ara_req.emul = LMUL_1;
20332048
end
20342049
// Divide instructions
20352050
6'b100000: ara_req.op = ara_pkg::VDIVU;
@@ -2288,6 +2303,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
22882303
ara_req.vstart = '0;
22892304
skip_lmul_checks = 1'b1;
22902305
ignore_zero_vl_check = 1'b1;
2306+
emul_vs2 = LMUL_1;
2307+
ara_req.emul = LMUL_1;
22912308

22922309
// Zero-extend operands
22932310
unique case (csr_vtype_q.vsew)
@@ -2730,6 +2747,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
27302747
ara_req.vl = |csr_vl_q ? 1 : '0;
27312748
// This instruction ignores LMUL checks
27322749
skip_lmul_checks = 1'b1;
2750+
ara_req.emul = LMUL_1;
27332751
end
27342752
6'b010111: ara_req.op = ara_pkg::VMERGE;
27352753
6'b011000: begin
@@ -3753,27 +3771,38 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
37533771
endcase
37543772

37553773
// Mask the next request if we don't need to reshuffle the next reg
3756-
if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1;
3774+
if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1;
37573775
end
37583776

37593777
// Reshuffle if at least one of the three registers needs a reshuffle
37603778
if (|reshuffle_req_d) begin
37613779
// Instruction is of one of the RVV types
37623780
automatic rvv_instruction_t insn = rvv_instruction_t'(instr.instr);
3781+
automatic rvv_pkg::vlmul_e reshuffle_emul;
37633782

37643783
// Stall the interface, and inject a reshuffling instruction
37653784
acc_resp_o.req_ready = 1'b0;
37663785
acc_resp_o.resp_valid = 1'b0;
37673786
ara_req_valid = 1'b0;
37683787

37693788
// Initialize the reshuffle counter limit to handle LMUL > 1
3770-
unique case (ara_req.emul)
3771-
LMUL_2: rs_lmul_cnt_limit_d = 1;
3772-
LMUL_4: rs_lmul_cnt_limit_d = 3;
3773-
LMUL_8: rs_lmul_cnt_limit_d = 7;
3774-
default: rs_lmul_cnt_limit_d = 0;
3789+
unique casez (reshuffle_req_d)
3790+
3'b??1: reshuffle_emul = ara_req.emul;
3791+
3'b?10: reshuffle_emul = emul_vs2;
3792+
3'b100: reshuffle_emul = emul_vs1;
37753793
endcase
37763794

3795+
unique case (reshuffle_emul)
3796+
LMUL_2: reg_lmul_cnt_limit_d = 1;
3797+
LMUL_4: reg_lmul_cnt_limit_d = 3;
3798+
LMUL_8: reg_lmul_cnt_limit_d = 7;
3799+
default: reg_lmul_cnt_limit_d = 0;
3800+
endcase
3801+
3802+
// Save vs1 and vs2 emul for reshuffling. vd, if needed, has been saved already.
3803+
emul_vs1_d = emul_vs1;
3804+
emul_vs2_d = emul_vs2;
3805+
37773806
// Save info for next reshuffles
37783807
reshuffle_eew_vs1_d = ara_req.eew_vs1;
37793808
reshuffle_eew_vs2_d = ara_req.eew_vs2;

0 commit comments

Comments
 (0)