-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[RISCV] Update SpacemiT-X60 vector fixed-point arithmetic latencies #150517
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Signed-off-by: Mikhail R. Gadelha <[email protected]>
@llvm/pr-subscribers-backend-risc-v Author: Mikhail R. Gadelha (mikhailramalho) ChangesThis PR adds hardware-measured latencies for all instructions defined in Section 12 of the RVV specification: "Vector Fixed-Point Arithmetic Instructions" to the SpacemiT-X60 scheduling model. Patch is 436.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150517.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index bf23812c992ba..e000b4a71810c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -501,16 +501,27 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ // Pattern of vsmul: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, e64 = 7,8,16,32
+ // We use the worst-case until we can split the SEW.
+ // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites
+ let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in {
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// 13. Vector Floating-Point Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index 7990dfc0880a5..4c84304405cbc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -366,8 +366,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: # => This Inner Loop Header: Depth=2
; RV64X60-NEXT: vl2r.v v8, (s2)
; RV64X60-NEXT: vl2r.v v10, (s3)
-; RV64X60-NEXT: sub s1, s1, t3
; RV64X60-NEXT: vaaddu.vv v8, v8, v10
+; RV64X60-NEXT: sub s1, s1, t3
; RV64X60-NEXT: vs2r.v v8, (s4)
; RV64X60-NEXT: add s4, s4, t3
; RV64X60-NEXT: add s3, s3, t3
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
index 5cf5ed575a3e2..36917d4d14e8f 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
@@ -3002,357 +3002,357 @@ vwsub.wx v8, v16, x30
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VWSUB_VX vwsub.vx v8, v16, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VV vaaddu.vv v8, v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VAADDU_VV vaaddu.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 5 4.00 5 SMX60_VIEU[4] VAADDU_VX vaaddu.vx v8, v8, t5
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VAADDU_VX vaaddu.vx v8, v8, t5
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
Signed-off-by: Mikhail R. Gadelha <[email protected]>
@mshockwave I have updated the ReleaseAtCycles with data from the camel-cdr page. May I ask for another review? |
This PR adds hardware-measured latencies for all instructions defined in Section 12 of the RVV specification: "Vector Fixed-Point Arithmetic Instructions" to the SpacemiT-X60 scheduling model.