-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Update SpacemiT-X60 vector permutation instructions latencies #152738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Signed-off-by: Mikhail R. Gadelha <[email protected]>
Signed-off-by: Mikhail R. Gadelha <[email protected]>
Signed-off-by: Mikhail R. Gadelha <[email protected]>
|
@llvm/pr-subscribers-backend-risc-v Author: Mikhail R. Gadelha (mikhailramalho) ChangesThis PR adds hardware-measured latencies for all instructions defined in Section 16 of the RVV specification: "Vector Permutation Instructions" to the SpacemiT-X60 scheduling model. Patch is 265.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152738.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 5541506ceb1e3..7064459c22ab4 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -683,39 +683,82 @@ foreach mx = SchedMxList in {
}
// 16. Vector Permutation Instructions
+// Slide
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ // Latency for slide up: 4/4/8/16, ReleaseAtCycles is 2/4/8/16
+ defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ // Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17
+ defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c;
+ defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c;
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ // The following group slide up and down together, so we use the worst-case
+ // (slide down) for all.
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
-def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
-
-def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
+// ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 6, ReleaseAtCycles = [6] in {
+ def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
+}
-// Gather and Compress
-foreach mx = SchedMxList in {
- foreach sew = SchedSEWSet<mx>.val in {
- defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- }
+// ReleaseAtCycles is 1/1/1/1/1/2/4, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 4, ReleaseAtCycles = [4] in {
+ def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
}
+// Integer LMUL Gather and Compress
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ defvar VRGatherLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+
+ defvar VRGatherVVLat = GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c;
+ defvar VRGatherVVOcc = GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c;
+ let Latency = VRGatherVVLat, ReleaseAtCycles = [VRGatherVVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ // For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles)
+ defvar VRGatherEI16Lat = !if(!eq(sew, 8),
+ GetLMULValue<[4, 4, 4, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c);
+ defvar VRGatherEI16Occ = !if(!eq(sew, 8),
+ GetLMULValue<[1, 1, 2, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c);
+ let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+
+ defvar VCompressVLat = GetLMULValue<[4, 4, 4, 4, 10, 36, 136], mx>.c;
+ defvar VCompressVOcc = GetLMULValue<[1, 1, 1, 3, 10, 36, 136], mx>.c;
+ let Latency = VCompressVLat, ReleaseAtCycles = [VCompressVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ }
}
// Others
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
index f59c7987b615b..311a13c9427b1 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
@@ -2911,65 +2911,65 @@ vfwsub.wv v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFMADD_VV vfmadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFMV_V_F vfmv.v.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
@@ -3763,7 +3763,7 @@ vfwsub.wv v8, v16, v24
#...
[truncated]
|
mshockwave
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| def : WriteRes<WriteVMovFS, [SMX60_VIEU]>; | ||
| def : WriteRes<WriteVMovSF, [SMX60_VIEU]>; | ||
| // ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now | ||
| // TODO: Split this into separate WriteRes for each MX |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it wasn't split by MX because vmv.x.s and friends should be independent of LMUL. I think Specmit X60 might be an outlier here and future hardware are probably more likely to have LMUL-independent latency & occupancy.
…llvm#152738) This PR adds hardware-measured latencies for all instructions defined in Section 16 of the RVV specification: "Vector Permutation Instructions" to the SpacemiT-X60 scheduling model. --------- Signed-off-by: Mikhail R. Gadelha <[email protected]>
This PR adds hardware-measured latencies for all instructions defined in Section 16 of the RVV specification: "Vector Permutation Instructions" to the SpacemiT-X60 scheduling model.