From fc3ef2bb74ce901b3a66c498809f41405fbb0018 Mon Sep 17 00:00:00 2001 From: wayne111 Date: Tue, 26 Nov 2024 23:56:47 +0800 Subject: [PATCH 1/5] fix the mask operand access wrong address #243 --- hardware/src/lane/lane_sequencer.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hardware/src/lane/lane_sequencer.sv b/hardware/src/lane/lane_sequencer.sv index 3ddcfa6eb..90d781d1d 100644 --- a/hardware/src/lane/lane_sequencer.sv +++ b/hardware/src/lane/lane_sequencer.sv @@ -342,7 +342,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // Since this request goes outside of the lane, we might need to request an // extra operand regardless of whether it is valid in this lane or not. vl : (pe_req.vl / NrLanes / 8) >> unsigned'(pe_req.vtype.vsew), - vstart : vfu_operation_d.vstart, + vl : (pe_req.vl / NrLanes / 8) >> int'(pe_req.vtype.vsew), hazard : pe_req.hazard_vm | pe_req.hazard_vd, default: '0 }; @@ -425,7 +425,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // Since this request goes outside of the lane, we might need to request an // extra operand regardless of whether it is valid in this lane or not. vl : (pe_req.vl / NrLanes / 8) >> unsigned'(pe_req.vtype.vsew), - vstart : vfu_operation_d.vstart, + vstart : (pe_req.vstart / NrLanes / 8) >> int'(pe_req.vtype.vsew), hazard : pe_req.hazard_vm | pe_req.hazard_vd, default: '0 }; @@ -443,7 +443,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // Since this request goes outside of the lane, we might need to request an // extra operand regardless of whether it is valid in this lane or not. vl : (pe_req.vl / NrLanes / 8) >> unsigned'(pe_req.vtype.vsew), - vstart : vfu_operation_d.vstart, + vstart : (pe_req.vstart / NrLanes / 8) >> int'(pe_req.vtype.vsew), hazard : pe_req.hazard_vm | pe_req.hazard_vd, default: '0 }; @@ -504,7 +504,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // Since this request goes outside of the lane, we might need to request an // extra operand regardless of whether it is valid in this lane or not. vl : (pe_req.vl / NrLanes / 8) >> unsigned'(pe_req.vtype.vsew), - vstart : vfu_operation_d.vstart, + vstart : (pe_req.vstart / NrLanes / 8) >> int'(pe_req.vtype.vsew), hazard : pe_req.hazard_vm | pe_req.hazard_vd, default: '0 }; @@ -754,7 +754,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // Since this request goes outside of the lane, we might need to request an // extra operand regardless of whether it is valid in this lane or not. vl : (pe_req.vl / NrLanes / ELEN), - vstart : vfu_operation_d.vstart, + vstart : (pe_req.vstart / NrLanes / ELEN), hazard : pe_req.hazard_vm, default: '0 }; From 2fa69f118e6d2174b611f932eb7f572808ab79ed Mon Sep 17 00:00:00 2001 From: wayne111 Date: Wed, 27 Nov 2024 00:17:37 +0800 Subject: [PATCH 2/5] Prevent extra lane operations caused by unbalanced workloads --- hardware/src/lane/operand_requester.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardware/src/lane/operand_requester.sv b/hardware/src/lane/operand_requester.sv index 1baec0780..0ff8be3a7 100644 --- a/hardware/src/lane/operand_requester.sv +++ b/hardware/src/lane/operand_requester.sv @@ -385,7 +385,7 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( // Mute the requisition if the vl is zero - if (operand_request_i[requester_index].vl == '0) begin : zero_vl + if (operand_request_i[requester_index].vl == '0 || | (operand_request_i[requester].vl == operand_request_i[requester].vstart)) begin : zero_vl state_d = IDLE; operand_queue_cmd_valid_o[requester_index] = 1'b0; end : zero_vl From 900abfdcb4094f20171d78328c257fb78c76392e Mon Sep 17 00:00:00 2001 From: wayne-win <145100853+wayne-win@users.noreply.github.com> Date: Wed, 27 Nov 2024 00:25:11 +0800 Subject: [PATCH 3/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bace86a6..af83566f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Bump upload and delete artifact actions - Fix synthesis-unfriendly constructs - Fix vector slicing bug in operand requesters + - Fix the mask_operand_requester access wrong address considergin vstart ### Added From 3a5bad0ca1fc3af43f12a6ba9d451c2757f0518f Mon Sep 17 00:00:00 2001 From: wayne111 Date: Wed, 27 Nov 2024 00:38:07 +0800 Subject: [PATCH 4/5] Illegal instructions should not trigger unnecessary reshuffles #276 --- hardware/src/ara_dispatcher.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index b7abef276..d75b6bfa3 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -3272,7 +3272,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Check if we need to reshuffle our vector registers involved in the operation // This operation is costly when occurs, so avoid it if possible - if ( ara_req_valid && !acc_resp_o.exception.valid ) begin + if ( ara_req_valid && !acc_resp_o.exception.valid && !illegal_insn) begin automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // Is the instruction an in-lane one and could it be subject to reshuffling? From f78c3a75e8b5f34ba4eee0fae1811940b9dc078d Mon Sep 17 00:00:00 2001 From: wayne111 Date: Wed, 27 Nov 2024 08:23:04 +0800 Subject: [PATCH 5/5] Push operand request only when vl not zeor, #259 --- hardware/src/lane/lane_sequencer.sv | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/hardware/src/lane/lane_sequencer.sv b/hardware/src/lane/lane_sequencer.sv index 90d781d1d..4f9e77286 100644 --- a/hardware/src/lane/lane_sequencer.sv +++ b/hardware/src/lane/lane_sequencer.sv @@ -310,7 +310,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: target_fu : ALU_SLDU, default : '0 }; - operand_request_push[AluA] = pe_req.use_vs1; + operand_request_push[AluA] = pe_req.use_vs1 && vfu_operation_valid_d; operand_request[AluB] = '{ id : pe_req.id, @@ -331,7 +331,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: target_fu : ALU_SLDU, default : '0 }; - operand_request_push[AluB] = pe_req.use_vs2; + operand_request_push[AluB] = pe_req.use_vs2 && vfu_operation_valid_d; // This vector instruction uses masks operand_request[MaskM] = '{ @@ -348,7 +348,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: }; if ((operand_request[MaskM].vl << unsigned'(pe_req.vtype.vsew)) * NrLanes * 8 != pe_req.vl) operand_request[MaskM].vl += 1; - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; end VFU_MFpu: begin operand_request[MulFPUA] = '{ @@ -369,7 +369,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: target_fu : MFPU_ADDRGEN, default : '0 }; - operand_request_push[MulFPUA] = pe_req.use_vs1; + operand_request_push[MulFPUA] = pe_req.use_vs1 && vfu_operation_valid_d; operand_request[MulFPUB] = '{ id : pe_req.id, @@ -391,7 +391,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: target_fu : MFPU_ADDRGEN, default: '0 }; - operand_request_push[MulFPUB] = pe_req.swap_vs2_vd_op ? + operand_request_push[MulFPUB] = !vfu_operation_valid_d ? 1'b0 : pe_req.swap_vs2_vd_op ? pe_req.use_vd_op : pe_req.use_vs2; operand_request[MulFPUC] = '{ @@ -413,7 +413,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: target_fu : MFPU_ADDRGEN, default : '0 }; - operand_request_push[MulFPUC] = pe_req.swap_vs2_vd_op ? + operand_request_push[MulFPUC] = !vfu_operation_valid_d ? 1'b0 : pe_req.swap_vs2_vd_op ? pe_req.use_vs2 : pe_req.use_vd_op; // This vector instruction uses masks @@ -431,7 +431,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: }; if ((operand_request[MaskM].vl << unsigned'(pe_req.vtype.vsew)) * NrLanes * 8 != pe_req.vl) operand_request[MaskM].vl += 1; - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; end VFU_LoadUnit : begin // This vector instruction uses masks @@ -449,7 +449,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: }; if ((operand_request[MaskM].vl << unsigned'(pe_req.vtype.vsew)) * NrLanes * 8 != pe_req.vl) operand_request[MaskM].vl += 1; - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; // Load indexed operand_request[SlideAddrGenA] = '{ @@ -469,7 +469,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // extra operand regardless of whether it is valid in this lane or not. if (operand_request[SlideAddrGenA].vl * NrLanes != pe_req_i.vl) operand_request[SlideAddrGenA].vl += 1; - operand_request_push[SlideAddrGenA] = pe_req_i.op == VLXE; + operand_request_push[SlideAddrGenA] = pe_req_i.op == VLXE && vfu_operation_valid_d; end VFU_StoreUnit : begin @@ -510,7 +510,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: }; if ((operand_request[MaskM].vl << unsigned'(pe_req.vtype.vsew)) * NrLanes * 8 != pe_req.vl) operand_request[MaskM].vl += 1; - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; // Store indexed // TODO: add vstart support here @@ -532,7 +532,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: if (operand_request[SlideAddrGenA].vl * NrLanes != pe_req_i.vl) begin operand_request[SlideAddrGenA].vl += 1; end - operand_request_push[SlideAddrGenA] = pe_req_i.op == VSXE; + operand_request_push[SlideAddrGenA] = pe_req_i.op == VSXE && vfu_operation_valid_d; end VFU_SlideUnit: begin @@ -549,7 +549,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: hazard : pe_req.hazard_vs2 | pe_req.hazard_vd, default : '0 }; - operand_request_push[SlideAddrGenA] = pe_req.use_vs2; + operand_request_push[SlideAddrGenA] = pe_req.use_vs2 && vfu_operation_valid_d; unique case (pe_req.op) VSLIDEUP: begin @@ -608,7 +608,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: hazard : pe_req.hazard_vm | pe_req.hazard_vd, default : '0 }; - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; case (pe_req.op) VSLIDEUP: begin @@ -667,7 +667,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: if ((operand_request[AluA].vl << (unsigned'(EW64) - unsigned'(pe_req.eew_vs1))) * NrLanes != pe_req.vl) operand_request[AluA].vl += 1; end - operand_request_push[AluA] = pe_req.use_vs1 && !(pe_req.op inside {[VMFEQ:VMFGE], VCPOP, VMSIF, VMSOF, VMSBF}); + operand_request_push[AluA] = pe_req.use_vs1 && !(pe_req.op inside {[VMFEQ:VMFGE], VCPOP, VMSIF, VMSOF, VMSBF}) && vfu_operation_valid_d; operand_request[AluB] = '{ id : pe_req.id, @@ -694,7 +694,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: if ((operand_request[AluB].vl << (unsigned'(EW64) - unsigned'(pe_req.eew_vs2))) * NrLanes != pe_req.vl) operand_request[AluB].vl += 1; end - operand_request_push[AluB] = pe_req.use_vs2 && !(pe_req.op inside {[VMFEQ:VMFGE], VCPOP, VMSIF, VMSOF, VMSBF, VFIRST}); + operand_request_push[AluB] = pe_req.use_vs2 && !(pe_req.op inside {[VMFEQ:VMFGE], VCPOP, VMSIF, VMSOF, VMSBF, VFIRST}) && vfu_operation_valid_d; operand_request[MulFPUA] = '{ id : pe_req.id, @@ -710,7 +710,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // This is an operation that runs normally on the ALU, and then gets *condensed* and // reshuffled at the Mask Unit. operand_request[MulFPUA].vl = vfu_operation_d.vl; - operand_request_push[MulFPUA] = pe_req.use_vs1 && pe_req.op inside {[VMFEQ:VMFGE]} && !(pe_req.op inside {VCPOP, VMSIF, VMSOF, VMSBF}); + operand_request_push[MulFPUA] = pe_req.use_vs1 && pe_req.op inside {[VMFEQ:VMFGE]} && !(pe_req.op inside {VCPOP, VMSIF, VMSOF, VMSBF}) && vfu_operation_valid_d; operand_request[MulFPUB] = '{ id : pe_req.id, @@ -744,7 +744,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: if ((pe_req.vl % (NrLanes*ELEN)) != 0) begin operand_request[MaskB].vl += 1'b1; end - operand_request_push[MaskB] = pe_req.use_vs2 && pe_req.op inside {VCPOP, VFIRST, VMSIF, VMSOF, VMSBF}; + operand_request_push[MaskB] = pe_req.use_vs2 && pe_req.op inside {VCPOP, VFIRST, VMSIF, VMSOF, VMSBF} && vfu_operation_valid_d; operand_request[MaskM] = '{ id : pe_req.id, @@ -761,7 +761,7 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: if ((operand_request[MaskM].vl * NrLanes * ELEN) != pe_req.vl) begin operand_request[MaskM].vl += 1; end - operand_request_push[MaskM] = !pe_req.vm; + operand_request_push[MaskM] = !pe_req.vm && vfu_operation_valid_d; end VFU_None: begin operand_request[MaskB] = '{