diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 9059d5a4e497b..11c8b66e5705f 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -13,6 +13,156 @@ // //===----------------------------------------------------------------------===// +class SMX60IsWorstCaseMX MxList> { + string LLMUL = LargestLMUL.r; + bit c = !eq(mx, LLMUL); +} + +class SMX60IsWorstCaseMXSEW MxList, bit isF = 0> { + string LLMUL = LargestLMUL.r; + int SSEW = SmallestSEW.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +// 1 Micro-Op per cycle. +class Get1248Latency { + int c = !cond( + !eq(mx, "M1") : 1, + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +// Basic scaling pattern (4,4,4,4,4,8,16): doubles at higher LMULs +// Used for: logical ops, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides +class Get44816Latency { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 16, + !eq(mx, "MF2") : 4, + !eq(mx, "MF4") : 4, + !eq(mx, "MF8") : 4 + ); +} + +// Arithmetic scaling pattern (4,4,4,4,4,5,8): minimal increase at M4 +// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max +class Get4458Latency { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 5, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 4, + !eq(mx, "MF4") : 4, + !eq(mx, "MF8") : 4 + ); +} + +// Progressive scaling pattern (4,4,4,4,6,10,18): gradual increase from M2 +// Used for: mask-producing comparisons, carry ops with mask, FP comparisons +class Get461018Latency { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 6, + !eq(mx, "M4") : 10, + !eq(mx, "M8") : 18, + !eq(mx, "MF2") : 4, + !eq(mx, "MF4") : 4, + !eq(mx, "MF8") : 4 + ); +} + +// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs +// Used for: widening operations +class Get4588Latency { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 5, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback + !eq(mx, "MF2") : 4, + !eq(mx, "MF4") : 4, + !eq(mx, "MF8") : 4 + ); +} + +// Complex FP scaling pattern (6,6,6,6,6,7,8): minimal increase +// Used for: FP FMA operations, complex FP ops +class Get6678Latency { + int c = !cond( + !eq(mx, "M1") : 6, + !eq(mx, "M2") : 6, + !eq(mx, "M4") : 7, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 6, + !eq(mx, "MF4") : 6, + !eq(mx, "MF8") : 6 + ); +} + +// FP reduction scaling pattern (12,12,12,15,21,33,57): progressive from M1 +// Used for: FP reductions +class Get15213357Latency { + int c = !cond( + !eq(mx, "M1") : 15, + !eq(mx, "M2") : 21, + !eq(mx, "M4") : 33, + !eq(mx, "M8") : 57, + !eq(mx, "MF2") : 12, + !eq(mx, "MF4") : 12, + !eq(mx, "MF8") : 12 + ); +} + +// Reduction scaling pattern (5,5,5,7,11,19,35): progressive from M1 +// Used for: integer reductions (fractional=5) +class Get7111935Latency { + int c = !cond( + !eq(mx, "M1") : 7, + !eq(mx, "M2") : 11, + !eq(mx, "M4") : 19, + !eq(mx, "M8") : 35, + !eq(mx, "MF2") : 5, + !eq(mx, "MF4") : 5, + !eq(mx, "MF8") : 5 + ); +} + +// Flat scaling pattern (5,5,5,5,5,5,8): constant until final jump +// Used for: e32 multiply pattern, some FP ops +class Get5558Latency { + int c = !cond( + !eq(mx, "M1") : 5, + !eq(mx, "M2") : 5, + !eq(mx, "M4") : 5, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 5, + !eq(mx, "MF4") : 5, + !eq(mx, "MF8") : 5 + ); +} + +// Exponential scaling pattern (7,7,7,7,8,16,32): exponential at M4/M8 +// Used for: e64 multiply pattern, complex ops +class Get781632Latency { + int c = !cond( + !eq(mx, "M1") : 7, + !eq(mx, "M2") : 8, + !eq(mx, "M4") : 16, + !eq(mx, "M8") : 32, + !eq(mx, "MF2") : 7, + !eq(mx, "MF4") : 7, + !eq(mx, "MF8") : 7 + ); +} + def SpacemitX60Model : SchedMachineModel { let IssueWidth = 2; // dual-issue let MicroOpBufferSize = 0; // in-order @@ -44,6 +194,16 @@ let BufferSize = 0 in { // floating point instructions, this model assumes single issue as // increasing it reduces the gains we saw in performance def SMX60_FP : ProcResource<1>; + + // Vector pipeline + // Single issue for vector store/load instructions + def SMX60_VLS : ProcResource<1>; + + def SMX60_VIEU : ProcResource<1>; + + // The C908 user manual says: "The vector execution unit is developed by + // extending the floating-point unit", so let's assume single issue for now + def SMX60_VFP : ProcResource<1>; } //===----------------------------------------------------------------------===// @@ -232,9 +392,660 @@ let Latency = 4 in { def : WriteRes; } +// 6. Configuration-Setting Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 7. Vector Loads and Stores +// TODO: These latencies are estimations and are not confirmed experimentally +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + // Unit-stride loads and stores + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>; + } + + // Mask loads and stores + let Latency = 3 in { + defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>; + defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>; + } + + // Strided and indexed loads and stores: scale with both LMUL and EEW + foreach eew = [8, 16, 32, 64] in { + defvar EEWMultiplier = !div(eew, 8); + defvar StridedLatency = !mul(Get4458Latency.c, EEWMultiplier); + let Latency = StridedLatency in { + defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>; + } + } +} + +// Segmented loads and stores: base latency multiplied by number of fields +// TODO: These latencies are estimations and are not confirmed experimentally +foreach mx = SchedMxList in { + foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + defvar EEWMultiplier = !div(eew, 8); + defvar StridedLatency = !mul(Get4458Latency.c, EEWMultiplier); + + // Unit-stride segmented + let Latency = !mul(Get44816Latency.c, nf) in { + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + } + + // Strided/indexed segmented + let Latency = !mul(StridedLatency, nf) in { + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>; + } + } + } +} + +// Whole register move/load/store +// TODO: These latencies are estimations and are not confirmed experimentally +foreach LMul = [1, 2, 4, 8] in { + let Latency = Get461018Latency(LMul))>.c in { + def : WriteRes("WriteVLD" # LMul # "R"), [SMX60_VLS]>; + def : WriteRes("WriteVST" # LMul # "R"), [SMX60_VLS]>; + } +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 16; } + +// 11. Vector Integer Arithmetic Instructions +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = Get4458Latency.c, ReleaseAtCycles = [Get1248Latency.c] in { + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get44816Latency.c in { + // Pattern of vadd, vsub, vrsub: 4/4/5/8 + // Pattern of vand, vor, vxor: 4/4/8/16 + // They are grouped together, so we used the worst case 4/4/5/16 + // TODO: use InstRW to override individual instructions' scheduling data + defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>; + + // Pattern of vadc, vsbc: 4/4/4/4/4/8/16, except for e8m8 = 9. We set e8m8 to 16 + defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get461018Latency.c in { + // Pattern of vmadc, vmsbc, vmseq, etc: 4/4/4/4/6/10/18, except for e8m8 = 17 + // We set e8m8 to 18 + defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, + // e64 = 7,8,16,32. We use the worst-case until we can split the SEW. + // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites + let Latency = Get781632Latency.c in { + defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// Widening +// Pattern of vwmul, vwmacc, etc: e8/e16 = 4/4/5/8, e32 = 5,5,5,8 +// We use the worst-case for all. +foreach mx = SchedMxListW in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + defvar WideningLat = Get4588Latency.c; + let Latency = WideningLat, ReleaseAtCycles = [Get1248Latency.c] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>; + } + let Latency = WideningLat in { + defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// Division and remainder operations +// Pattern of vdivu: 11/11/11/20/40/80/160 +// Pattern of vdiv: 12/12/12/22/44/88/176 +// Pattern of vremu: 12/12/12/22/44/88/176 +// Pattern of vrem: 13/13/13/24/48/96/192 +// We use the worst-case for all: 24/24/24/24/48/96/192 +// TODO: Create separate WriteVIRem to more closely match the latencies +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + // NumDLEN = 2 * LMUL (since DLEN = VLEN/2) + defvar NumDLEN = !mul(2, Get1248Latency.c); + + let Latency = !mul(NumDLEN, 12) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +// Simple Narrowing Shift and Clips +foreach mx = ["MF8", "MF4", "MF2", "M1"] in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// Complex Narrowing Shift and Clips +foreach mx = ["M2", "M4"] in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = !mul(Get44816Latency.c, 2) in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// 12. Vector Fixed-Point Arithmetic Instructions +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = Get4458Latency.c, ReleaseAtCycles = [Get1248Latency.c] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Pattern of vsmul: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, e64 = 7,8,16,32 + // We use the worst-case until we can split the SEW. + // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites + let Latency = Get781632Latency.c in { + defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// 13. Vector Floating-Point Instructions + +// Simple Vector Floating-Point Instructions +foreach mx = ["MF4", "MF2"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = 4 in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + let Latency = 5 in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Complex Vector Floating-Point Instructions +foreach mx = ["M1", "M2", "M4", "M8"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + defvar ArithLat = Get4458Latency.c; + let Latency = ArithLat, ReleaseAtCycles = [Get1248Latency.c] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + defvar FlatLat = Get5558Latency.c; + let Latency = !if(!eq(sew, 64), FlatLat, ArithLat) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + // TODO: for some reason, the following cond is not working, and always use FlatLat + let Latency = !if(!eq(sew, 64), Get6678Latency.c, FlatLat) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + let Latency = Get44816Latency.c in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = Get461018Latency.c in { + defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>; + } + + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>; + } +} + +// Widening conversion operations use 4 * LMUL cycles +foreach mx = SchedMxListW in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = !mul(Get1248Latency.c, 4) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListFW in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = !mul(Get1248Latency.c, 4) in { + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxListFW in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Pattern for vfwsub/vfwadd.vv, vfwsub/vfwadd.vf: e16mf4=4, e16mf2=4, e16m1=4, e16m2=5, + // e16m4=8, e32mf2=4, e32m1=4, e32m2=5, e32m4=8 + // Pattern for vfwsub/vfwadd.wv, vfwsub/vfwadd.wf: e16mf4=5, e16mf2=5, e16m1=5, e16m2=9, + // e16m4=17, e32mf2=5, e32m1=5, e32m2=9, e32m4=17 + // TODO: Split .wf/.wv variants into separate scheduling classes to use 5/5/9/17 + defvar LMulLat = Get1248Latency.c; + let Latency = !mul(LMulLat, 4) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + // Pattern for vfwmul.vv, vfwmul.vf: e16 = 4/4/4/6/8. We use 4/4/5/8 as approximation + // TODO: e32m4 = 8, but it's set to 5 here + let Latency = !if(!eq(sew, 32), Get5558Latency.c, Get4588Latency.c) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + // Pattern for vfwmacc, vfwnmacc, etc: e16 = 5/5/5/8; e32 = 6/6/7/8 + // Use existing 6,6,7,8 as close approximation + let Latency = Get6678Latency.c in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + let Latency = !mul(LMulLat, 4) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Narrowing conversion operations use 4 * LMUL cycles +foreach mx = SchedMxListW in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = !mul(Get1248Latency.c, 4) in { + defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxListFW in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = !mul(Get1248Latency.c, 4) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Simple floating-point division operations +foreach mx = ["MF4", "MF2"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = 7 in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Complex floating-point division operations with measured latencies +foreach mx = ["M1", "M2", "M4", "M8"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + defvar LMulLat = Get1248Latency.c; + + // Pattern for vfdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 18/36/72/144 + // Pattern for vfrdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 40/80/160/320 + // We use the worst-case, vfdiv.vf is penalized in e64 + // TODO: split vfdiv.vf and vfrdiv.vf into separate scheduling classes + let Latency = !if(!eq(sew, 64), !mul(LMulLat, 40), !mul(LMulLat, 12)) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>; + } + + // Compute latency based on SEW and LMUL combination + defvar SEWLatencyFactor = !cond( + !eq(sew, 16) : 12, // e16: 12*LMUL + !eq(sew, 32) : 38, // e32: 38*LMUL + !eq(sew, 64) : 40 // e64: 40*LMUL + ); + + let Latency = !mul(LMulLat, SEWLatencyFactor) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Pattern for vfsqrt.v: e16 = 18/36/72/144; e32 = 38/76/152/304; e64 = 40/80/160/320 +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Compute latency based on SEW and LMUL combination + defvar SEWLatencyFactor = !cond( + !eq(sew, 16) : 18, // e16: 18*LMUL + !eq(sew, 32) : 38, // e32: 38*LMUL + !eq(sew, 64) : 40 // e64: 40*LMUL + ); + + let Latency = !mul(Get1248Latency.c, SEWLatencyFactor) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// 14. Vector Reduction Operations +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = Get7111935Latency.c in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + + // Pattern for vredsum: 5/5/5/7/11/19/35 + // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34 + // They are grouped together, so we use the worst-case vredsum latency. + // TODO: split vredand, vredor, vredxor into separate scheduling classe. + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListWRed in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + let Latency = Get7111935Latency.c in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + // Pattern for vfredmax.vs, vfredmin.vs: 12,12,15,21,33,57 + // Pattern for vfredusum.vs is slightly lower for e16/e32 + // We use the worst-case for simplificity + let Latency = Get15213357Latency.c in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Simple vfredosum operations +foreach mx = ["MF4", "MF2"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Slightly increased latencies for e32mf2=24 (should be 12) + defvar SimpleLatency = !cond( + !eq(mx, "MF4") : 12, + !eq(mx, "MF2") : 24 + ); + + let Latency = SimpleLatency in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Complex vfredosum operations +foreach mx = ["M1", "M2", "M4", "M8"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Compute latency based on SEW and LMUL combination + defvar SEWLatencyFactor = !cond( + !eq(sew, 16) : 48, // e16: 48*LMUL + !eq(sew, 32) : 24, // e32: 24*LMUL + !eq(sew, 64) : 12 // e64: 12*LMUL + ); + + let Latency = !mul(Get1248Latency.c, SEWLatencyFactor) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Simple vfwredosum.vs and vfwredusum.vs operations +foreach mx = ["MF4", "MF2"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Slightly increased latencies for e32mf2=32 (should be 16) + defvar SimpleLatency = !cond( + !eq(mx, "MF4") : 16, + !eq(mx, "MF2") : 32 + ); + + let Latency = SimpleLatency in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// Complex vfwredosum.vs and vfwredusum.vs operations (integer LMULs) +foreach mx = ["M1", "M2", "M4", "M8"] in { + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; + + // Compute latency based on SEW and LMUL combination + defvar SEWLatencyFactor = !cond( + !eq(sew, 16) : 64, // e16: 64*LMUL + !eq(sew, 32) : 32 // e32: 32*LMUL + ); + + let Latency = !mul(Get1248Latency.c, SEWLatencyFactor) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>; + } + } +} + +// 15. Vector Mask Instructions +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = 4 in { + defm "" : LMULWriteResMX<"WriteVMALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = 6 in { + defm "" : LMULWriteResMX<"WriteVMPopV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVIotaV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIdxV", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// 16. Vector Permutation Instructions +// Slide +foreach mx = SchedMxList in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + // Pattern for vslide1down.vx, vslidedown.vi/vx: 4/5/9/17 + // Pattern for vslide1up.vx: 4/4/8/16 + // We use 4/4/8/16 for simplicity + let Latency = Get44816Latency.c in { + defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>; + } +} + +// Simple Gather and Compress +foreach mx = ["MF8", "MF4", "MF2", "M1"] in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = 4 in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>; + + foreach sew = SchedSEWSet.val in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>; + + // Slightly reduced latencies for e8mf1=4 (should be 8) + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>; + } + } +} + +// Complex Gather and Compress +foreach mx = ["M2", "M4", "M8"] in { + defvar IsWorstCase = SMX60IsWorstCaseMX.c; + + let Latency = !mul(Get1248Latency.c, 2) in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>; + } + + foreach sew = SchedSEWSet.val in { + defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW.c; + + defvar BasicLat = Get44816Latency.c; + defvar ExpLat = !mul(BasicLat, BasicLat); + let Latency = ExpLat in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + + let Latency = !if(!eq(sew, 8), !mul(ExpLat, 2), ExpLat) in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + + defvar CompressLat = !cond( + !eq(mx, "M2") : 10, + !eq(mx, "M4") : 36, + true : 136 // M8 + ); + let Latency = CompressLat in { + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + } +} + // Others def : WriteRes; def : WriteRes; +def : WriteRes; //===----------------------------------------------------------------------===// // Bypass and advance @@ -341,10 +1152,184 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +// 6. Configuration-Setting Instructions +def : ReadAdvance; +def : ReadAdvance; + +// 7. Vector Loads and Stores +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTEV", 0>; +defm "" : LMULReadAdvance<"ReadVSTM", 0>; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; +defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; +defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 12. Vector Integer Arithmetic Instructions +defm : LMULReadAdvance<"ReadVIALUV", 0>; +defm : LMULReadAdvance<"ReadVIALUX", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; +defm : LMULReadAdvance<"ReadVExtV", 0>; +defm : LMULReadAdvance<"ReadVICALUV", 0>; +defm : LMULReadAdvance<"ReadVICALUX", 0>; +defm : LMULReadAdvance<"ReadVShiftV", 0>; +defm : LMULReadAdvance<"ReadVShiftX", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; +defm : LMULReadAdvance<"ReadVICmpV", 0>; +defm : LMULReadAdvance<"ReadVICmpX", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; +defm : LMULReadAdvance<"ReadVIMulV", 0>; +defm : LMULReadAdvance<"ReadVIMulX", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; +defm : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; +defm : LMULReadAdvance<"ReadVIMergeV", 0>; +defm : LMULReadAdvance<"ReadVIMergeX", 0>; +defm : LMULReadAdvance<"ReadVIMovV", 0>; +defm : LMULReadAdvance<"ReadVIMovX", 0>; + +// 13. Vector Fixed-Point Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVSALUV", 0>; +defm "" : LMULReadAdvance<"ReadVSALUX", 0>; +defm "" : LMULReadAdvance<"ReadVAALUV", 0>; +defm "" : LMULReadAdvance<"ReadVAALUX", 0>; +defm "" : LMULReadAdvance<"ReadVSMulV", 0>; +defm "" : LMULReadAdvance<"ReadVSMulX", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; + +// 14. Vector Floating-Point Instructions +defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; +defm "" : LMULReadAdvance<"ReadVFClassV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; +defm "" : LMULReadAdvance<"ReadVFMovF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; +defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; + +// 15. Vector Reduction Operations +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 16. Vector Mask Instructions +defm "" : LMULReadAdvance<"ReadVMALUV", 0>; +defm "" : LMULReadAdvance<"ReadVMPopV", 0>; +defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; +defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; +defm "" : LMULReadAdvance<"ReadVIotaV", 0>; + +// 17. Vector Permutation Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVISlideV", 0>; +defm "" : LMULReadAdvance<"ReadVISlideX", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Others +def : ReadAdvance; +def : ReadAdvance; +foreach mx = SchedMxList in { + def : ReadAdvance("ReadVPassthru_" # mx), 0>; + foreach sew = SchedSEWSet.val in + def : ReadAdvance("ReadVPassthru_" # mx # "_E" # sew), 0>; +} + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedQ; -defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbkb; diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 08cab7cd359b9..26578ce57a123 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -304,27 +304,27 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: li t1, 0 ; RV64X60-NEXT: addi s1, a7, -1 ; RV64X60-NEXT: zext.w s1, s1 -; RV64X60-NEXT: mul t2, a1, s1 -; RV64X60-NEXT: mul t3, a3, s1 -; RV64X60-NEXT: mul t4, a5, s1 +; RV64X60-NEXT: mul t3, a1, s1 +; RV64X60-NEXT: mul t4, a3, s1 +; RV64X60-NEXT: mul t5, a5, s1 ; RV64X60-NEXT: add s0, a0, a6 -; RV64X60-NEXT: add s1, a2, a6 -; RV64X60-NEXT: add t5, a4, a6 -; RV64X60-NEXT: add s0, s0, t2 ; RV64X60-NEXT: csrr t2, vlenb -; RV64X60-NEXT: add t3, t3, s1 +; RV64X60-NEXT: add s1, a2, a6 +; RV64X60-NEXT: add t3, t3, s0 +; RV64X60-NEXT: add s0, a4, a6 +; RV64X60-NEXT: add t4, t4, s1 ; RV64X60-NEXT: li t6, 32 -; RV64X60-NEXT: add t4, t4, t5 -; RV64X60-NEXT: sltu t3, a0, t3 -; RV64X60-NEXT: sltu s1, a2, s0 -; RV64X60-NEXT: and t3, t3, s1 -; RV64X60-NEXT: or t5, a1, a3 -; RV64X60-NEXT: sltu s1, a0, t4 -; RV64X60-NEXT: sltu s0, a4, s0 -; RV64X60-NEXT: slti t4, t5, 0 +; RV64X60-NEXT: add t5, t5, s0 +; RV64X60-NEXT: sltu s0, a0, t4 +; RV64X60-NEXT: sltu s1, a2, t3 +; RV64X60-NEXT: and t4, s0, s1 +; RV64X60-NEXT: or s2, a1, a3 +; RV64X60-NEXT: sltu s0, a0, t5 +; RV64X60-NEXT: sltu s1, a4, t3 +; RV64X60-NEXT: slti t3, s2, 0 ; RV64X60-NEXT: and s0, s0, s1 ; RV64X60-NEXT: or s1, a1, a5 -; RV64X60-NEXT: or t4, t3, t4 +; RV64X60-NEXT: or t4, t4, t3 ; RV64X60-NEXT: slli t3, t2, 1 ; RV64X60-NEXT: slti s1, s1, 0 ; RV64X60-NEXT: or s0, s0, s1 @@ -366,8 +366,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: # => This Inner Loop Header: Depth=2 ; RV64X60-NEXT: vl2r.v v8, (s2) ; RV64X60-NEXT: vl2r.v v10, (s3) -; RV64X60-NEXT: sub s1, s1, t3 ; RV64X60-NEXT: vaaddu.vv v8, v8, v10 +; RV64X60-NEXT: sub s1, s1, t3 ; RV64X60-NEXT: vs2r.v v8, (s4) ; RV64X60-NEXT: add s4, s4, t3 ; RV64X60-NEXT: add s3, s3, t3 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s index bc9229471b20e..8838c862e6b75 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s @@ -107,6 +107,9 @@ amomaxu.d.aqrl s5, s4, (s3) # CHECK-NEXT: [2] - SMX60_IEUA:1 # CHECK-NEXT: [3] - SMX60_IEUB:1 # CHECK-NEXT: [4] - SMX60_LS:2 +# CHECK-NEXT: [5] - SMX60_VFP:1 +# CHECK-NEXT: [6] - SMX60_VIEU:1 +# CHECK-NEXT: [7] - SMX60_VLS:1 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -215,98 +218,101 @@ amomaxu.d.aqrl s5, s4, (s3) # CHECK-NEXT: [2] - SMX60_IEUB # CHECK-NEXT: [3.0] - SMX60_LS # CHECK-NEXT: [3.1] - SMX60_LS +# CHECK-NEXT: [4] - SMX60_VFP +# CHECK-NEXT: [5] - SMX60_VIEU +# CHECK-NEXT: [6] - SMX60_VLS # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - - - 44.00 44.00 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] +# CHECK-NEXT: - - - 44.00 44.00 - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - - - 0.50 0.50 lr.w t0, (t1) -# CHECK-NEXT: - - - 0.50 0.50 lr.w.aq t1, (t2) -# CHECK-NEXT: - - - 0.50 0.50 lr.w.rl t2, (t3) -# CHECK-NEXT: - - - 0.50 0.50 lr.w.aqrl t3, (t4) -# CHECK-NEXT: - - - 0.50 0.50 sc.w t6, t5, (t4) -# CHECK-NEXT: - - - 0.50 0.50 sc.w.aq t5, t4, (t3) -# CHECK-NEXT: - - - 0.50 0.50 sc.w.rl t4, t3, (t2) -# CHECK-NEXT: - - - 0.50 0.50 sc.w.aqrl t3, t2, (t1) -# CHECK-NEXT: - - - 0.50 0.50 lr.d t0, (t1) -# CHECK-NEXT: - - - 0.50 0.50 lr.d.aq t1, (t2) -# CHECK-NEXT: - - - 0.50 0.50 lr.d.rl t2, (t3) -# CHECK-NEXT: - - - 0.50 0.50 lr.d.aqrl t3, (t4) -# CHECK-NEXT: - - - 0.50 0.50 sc.d t6, t5, (t4) -# CHECK-NEXT: - - - 0.50 0.50 sc.d.aq t5, t4, (t3) -# CHECK-NEXT: - - - 0.50 0.50 sc.d.rl t4, t3, (t2) -# CHECK-NEXT: - - - 0.50 0.50 sc.d.aqrl t3, t2, (t1) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.w a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.w a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.w a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.w a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.w a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.w a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.w s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.w s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.w s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.w.aq a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.w.aq a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.w.aq a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.w.aq a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.w.aq a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.w.aq a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.w.aq s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.w.aq s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.w.aq s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.w.rl a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.w.rl a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.w.rl a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.w.rl a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.w.rl a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.w.rl a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.w.rl s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.w.rl s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.w.rl s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.w.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.w.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.w.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.w.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.w.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.w.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.w.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.w.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.w.aqrl s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.d a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.d a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.d a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.d a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.d a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.d a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.d s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.d s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.d s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.d.aq a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.d.aq a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.d.aq a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.d.aq a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.d.aq a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.d.aq a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.d.aq s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.d.aq s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.d.aq s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.d.rl a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.d.rl a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.d.rl a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.d.rl a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.d.rl a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.d.rl a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.d.rl s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.d.rl s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.d.rl s5, s4, (s3) -# CHECK-NEXT: - - - 0.50 0.50 amoswap.d.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - 0.50 0.50 amoadd.d.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - 0.50 0.50 amoxor.d.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - 0.50 0.50 amoand.d.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - 0.50 0.50 amoor.d.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - 0.50 0.50 amomin.d.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - 0.50 0.50 amomax.d.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - 0.50 0.50 amominu.d.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - 0.50 0.50 amomaxu.d.aqrl s5, s4, (s3) +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.w t0, (t1) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.w.aq t1, (t2) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.w.rl t2, (t3) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.w.aqrl t3, (t4) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.w t6, t5, (t4) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.w.aq t5, t4, (t3) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.w.rl t4, t3, (t2) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.d t0, (t1) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.d.aq t1, (t2) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.d.rl t2, (t3) +# CHECK-NEXT: - - - 0.50 0.50 - - - lr.d.aqrl t3, (t4) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.d t6, t5, (t4) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.d.aq t5, t4, (t3) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.d.rl t4, t3, (t2) +# CHECK-NEXT: - - - 0.50 0.50 - - - sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.w a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.w a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.w a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.w a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.w a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.w a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.w s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.w s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.w s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.d a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.d a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.d a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.d a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.d a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.d a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.d s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.d s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.d s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - 0.50 0.50 - - - amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - 0.50 0.50 - - - amomaxu.d.aqrl s5, s4, (s3) diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s index b86fcbccbeabb..78f4e7f50c745 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/floating-point.s @@ -135,6 +135,9 @@ fclass.d a3, ft10 # CHECK-NEXT: [2] - SMX60_IEUA:1 # CHECK-NEXT: [3] - SMX60_IEUB:1 # CHECK-NEXT: [4] - SMX60_LS:2 +# CHECK-NEXT: [5] - SMX60_VFP:1 +# CHECK-NEXT: [6] - SMX60_VIEU:1 +# CHECK-NEXT: [7] - SMX60_VLS:1 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -240,95 +243,98 @@ fclass.d a3, ft10 # CHECK-NEXT: [2] - SMX60_IEUB # CHECK-NEXT: [3.0] - SMX60_LS # CHECK-NEXT: [3.1] - SMX60_LS +# CHECK-NEXT: [4] - SMX60_VFP +# CHECK-NEXT: [5] - SMX60_VIEU +# CHECK-NEXT: [6] - SMX60_VLS # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: 149.00 11.00 11.00 3.00 3.00 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] +# CHECK-NEXT: 149.00 11.00 11.00 3.00 3.00 - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - - - 0.50 0.50 flh ft0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 fsh ft0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 flw ft0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 fsw ft0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 fld ft0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 fsd ft0, 0(a0) -# CHECK-NEXT: 1.00 - - - - fadd.h fs10, fs11, ft8 -# CHECK-NEXT: 1.00 - - - - fsub.h ft9, ft10, ft11 -# CHECK-NEXT: 1.00 - - - - fmul.h ft0, ft1, ft2 -# CHECK-NEXT: 12.00 - - - - fdiv.h ft3, ft4, ft5 -# CHECK-NEXT: 12.00 - - - - fsqrt.h ft6, ft7 -# CHECK-NEXT: 1.00 - - - - fmin.h fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fmax.h fs2, fs3, fs4 -# CHECK-NEXT: 1.00 - - - - fmadd.h fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1.00 - - - - fmsub.h fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fnmsub.h fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1.00 - - - - fnmadd.h fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - fadd.s fs10, fs11, ft8 -# CHECK-NEXT: 1.00 - - - - fsub.s ft9, ft10, ft11 -# CHECK-NEXT: 1.00 - - - - fmul.s ft0, ft1, ft2 -# CHECK-NEXT: 15.00 - - - - fdiv.s ft3, ft4, ft5 -# CHECK-NEXT: 15.00 - - - - fsqrt.s ft6, ft7 -# CHECK-NEXT: 1.00 - - - - fmin.s fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fmax.s fs2, fs3, fs4 -# CHECK-NEXT: 1.00 - - - - fmadd.s fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1.00 - - - - fmsub.s fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fnmsub.s fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1.00 - - - - fnmadd.s fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - fadd.d fs10, fs11, ft8 -# CHECK-NEXT: 1.00 - - - - fsub.d ft9, ft10, ft11 -# CHECK-NEXT: 1.00 - - - - fmul.d ft0, ft1, ft2 -# CHECK-NEXT: 22.00 - - - - fdiv.d ft3, ft4, ft5 -# CHECK-NEXT: 22.00 - - - - fsqrt.d ft6, ft7 -# CHECK-NEXT: 1.00 - - - - fmin.d fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fmax.d fs2, fs3, fs4 -# CHECK-NEXT: 1.00 - - - - fmadd.d fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1.00 - - - - fmsub.d fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1.00 - - - - fnmsub.d fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1.00 - - - - fnmadd.d fs6, fs7, fs8, fs9 -# CHECK-NEXT: - 0.50 0.50 - - fmv.x.h a2, fs7 -# CHECK-NEXT: - 0.50 0.50 - - fmv.h.x ft1, a6 -# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0 -# CHECK-NEXT: 1.00 - - - - fcvt.s.h fa0, ft0, rup -# CHECK-NEXT: 1.00 - - - - fcvt.h.s ft2, fa2 -# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0 -# CHECK-NEXT: 1.00 - - - - fcvt.d.h fa0, ft0, rup -# CHECK-NEXT: 1.00 - - - - fcvt.h.d ft2, fa2 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.w.s a0, fs5 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.wu.s a1, fs6 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.s.w ft11, a4 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.s.wu ft0, a5 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.l.s a0, ft0 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.lu.s a1, ft1 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.s.l ft2, a2 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.s.lu ft3, a3 -# CHECK-NEXT: - 0.50 0.50 - - fmv.x.w a2, fs7 -# CHECK-NEXT: - 0.50 0.50 - - fmv.w.x ft1, a6 -# CHECK-NEXT: 1.00 - - - - fsgnj.s fs1, fa0, fa1 -# CHECK-NEXT: 1.00 - - - - fsgnjn.s fa1, fa3, fa4 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.wu.d a4, ft11 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.w.d a4, ft11 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.d.w ft0, a5 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.d.wu ft1, a6 -# CHECK-NEXT: 1.00 - - - - fcvt.s.d fs5, fs6 -# CHECK-NEXT: 1.00 - - - - fcvt.d.s fs7, fs8 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.l.d a0, ft0 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.lu.d a1, ft1 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.d.l ft3, a3 -# CHECK-NEXT: - 0.50 0.50 - - fcvt.d.lu ft4, a4 -# CHECK-NEXT: - 0.50 0.50 - - fmv.x.d a2, ft2 -# CHECK-NEXT: - 0.50 0.50 - - fmv.d.x ft5, a5 -# CHECK-NEXT: 1.00 - - - - fsgnj.d fs1, fa0, fa1 -# CHECK-NEXT: 1.00 - - - - fsgnjn.d fa1, fa3, fa4 -# CHECK-NEXT: 1.00 - - - - feq.h a1, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - flt.h a2, fs10, fs11 -# CHECK-NEXT: 1.00 - - - - fle.h a3, ft8, ft9 -# CHECK-NEXT: 1.00 - - - - feq.s a1, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - flt.s a2, fs10, fs11 -# CHECK-NEXT: 1.00 - - - - fle.s a3, ft8, ft9 -# CHECK-NEXT: 1.00 - - - - feq.d a1, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - flt.d a2, fs10, fs11 -# CHECK-NEXT: 1.00 - - - - fle.d a3, ft8, ft9 -# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10 -# CHECK-NEXT: 1.00 - - - - fclass.s a3, ft10 -# CHECK-NEXT: 1.00 - - - - fclass.d a3, ft10 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - 0.50 0.50 - - - flh ft0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - fsh ft0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - flw ft0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - fsw ft0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - fld ft0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - fsd ft0, 0(a0) +# CHECK-NEXT: 1.00 - - - - - - - fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - - - - fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 12.00 - - - - - - - fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 12.00 - - - - - - - fsqrt.h ft6, ft7 +# CHECK-NEXT: 1.00 - - - - - - - fmin.h fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fmax.h fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - - - - fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - - - - fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 15.00 - - - - - - - fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 15.00 - - - - - - - fsqrt.s ft6, ft7 +# CHECK-NEXT: 1.00 - - - - - - - fmin.s fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fmax.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - - - - fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1.00 - - - - - - - fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 22.00 - - - - - - - fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 22.00 - - - - - - - fsqrt.d ft6, ft7 +# CHECK-NEXT: 1.00 - - - - - - - fmin.d fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fmax.d fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1.00 - - - - - - - fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1.00 - - - - - - - fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1.00 - - - - - - - fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.x.h a2, fs7 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.h.x ft1, a6 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1.00 - - - - - - - fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1.00 - - - - - - - fcvt.h.d ft2, fa2 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.w.s a0, fs5 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.wu.s a1, fs6 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.s.w ft11, a4 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.s.wu ft0, a5 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.l.s a0, ft0 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.lu.s a1, ft1 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.s.l ft2, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.s.lu ft3, a3 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.x.w a2, fs7 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.w.x ft1, a6 +# CHECK-NEXT: 1.00 - - - - - - - fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.wu.d a4, ft11 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.w.d a4, ft11 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.d.w ft0, a5 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1.00 - - - - - - - fcvt.d.s fs7, fs8 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.l.d a0, ft0 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.lu.d a1, ft1 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.d.l ft3, a3 +# CHECK-NEXT: - 0.50 0.50 - - - - - fcvt.d.lu ft4, a4 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.x.d a2, ft2 +# CHECK-NEXT: - 0.50 0.50 - - - - - fmv.d.x ft5, a5 +# CHECK-NEXT: 1.00 - - - - - - - fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: 1.00 - - - - - - - feq.h a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - flt.h a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - - - - fle.h a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - - - - feq.s a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - flt.s a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - - - - fle.s a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - - - - feq.d a1, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - flt.d a2, fs10, fs11 +# CHECK-NEXT: 1.00 - - - - - - - fle.d a3, ft8, ft9 +# CHECK-NEXT: 1.00 - - - - - - - fclass.s a3, ft10 +# CHECK-NEXT: 1.00 - - - - - - - fclass.s a3, ft10 +# CHECK-NEXT: 1.00 - - - - - - - fclass.d a3, ft10 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s index b72540f29f487..51a036aaae784 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/integer.s @@ -170,6 +170,9 @@ bseti a0, a1, 1 # CHECK-NEXT: [2] - SMX60_IEUA:1 # CHECK-NEXT: [3] - SMX60_IEUB:1 # CHECK-NEXT: [4] - SMX60_LS:2 +# CHECK-NEXT: [5] - SMX60_VFP:1 +# CHECK-NEXT: [6] - SMX60_VIEU:1 +# CHECK-NEXT: [7] - SMX60_VLS:1 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -306,126 +309,129 @@ bseti a0, a1, 1 # CHECK-NEXT: [2] - SMX60_IEUB # CHECK-NEXT: [3.0] - SMX60_LS # CHECK-NEXT: [3.1] - SMX60_LS +# CHECK-NEXT: [4] - SMX60_VFP +# CHECK-NEXT: [5] - SMX60_VIEU +# CHECK-NEXT: [6] - SMX60_VLS # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] -# CHECK-NEXT: - 180.50 44.50 5.50 5.50 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] +# CHECK-NEXT: - 180.50 44.50 5.50 5.50 - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - 0.50 0.50 - - addi a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - addiw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - slti a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - seqz a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - andi a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - ori a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - xori a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - slli a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - srli a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - srai a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - slliw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - srliw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - sraiw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - lui a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - auipc a1, 1 -# CHECK-NEXT: - 0.50 0.50 - - add a0, a0, a1 -# CHECK-NEXT: - 0.50 0.50 - - addw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - slt a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sltu a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - and a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - or a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - xor a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sll a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - srl a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sra a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sllw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - srlw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sraw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sub a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - subw a0, a0, a0 -# CHECK-NEXT: - 1.00 - - - jal a0, .Ltmp0 -# CHECK-NEXT: - 1.00 - - - jalr a0 -# CHECK-NEXT: - 1.00 - - - beq a0, a0, .Ltmp1 -# CHECK-NEXT: - 1.00 - - - bne a0, a0, .Ltmp2 -# CHECK-NEXT: - 1.00 - - - blt a0, a0, .Ltmp3 -# CHECK-NEXT: - 1.00 - - - bltu a0, a0, .Ltmp4 -# CHECK-NEXT: - 1.00 - - - bge a0, a0, .Ltmp5 -# CHECK-NEXT: - 1.00 - - - bgeu a0, a0, .Ltmp6 -# CHECK-NEXT: - 0.50 0.50 - - add a0, a0, a0 -# CHECK-NEXT: - - - 0.50 0.50 lb t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 lbu t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 lh t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 lhu t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 lw t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 lwu t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 ld t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 sb t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 sh t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 sw t0, 0(a0) -# CHECK-NEXT: - - - 0.50 0.50 sd t0, 0(a0) -# CHECK-NEXT: - 0.50 0.50 - - mul a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - mulh a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - mulhu a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - mulhsu a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - mulw a0, a0, a0 -# CHECK-NEXT: - 20.00 - - - div a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - divu a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - rem a0, a1, a2 -# CHECK-NEXT: - 20.00 - - - remu a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - divw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - divuw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - remw a0, a1, a2 -# CHECK-NEXT: - 12.00 - - - remuw a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - csrrw t0, 4095, t1 -# CHECK-NEXT: - 0.50 0.50 - - csrrs s3, fflags, s5 -# CHECK-NEXT: - 0.50 0.50 - - csrrc sp, 0, ra -# CHECK-NEXT: - 0.50 0.50 - - csrrwi a5, 0, 0 -# CHECK-NEXT: - 0.50 0.50 - - csrrsi t2, 4095, 31 -# CHECK-NEXT: - 0.50 0.50 - - csrrci t1, sscratch, 5 -# CHECK-NEXT: - 0.50 0.50 - - czero.eqz a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - czero.nez a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - czero.eqz a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - czero.nez a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - add.uw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - slli.uw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - sh1add.uw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sh2add.uw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sh3add.uw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sh1add a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sh2add a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sh3add a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - andn a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - orn a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - xnor a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - clz a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - clzw a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - ctz a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - ctzw a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - cpop a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - cpopw a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - min a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - minu a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - max a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - maxu a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sext.b a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - sext.h a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - zext.h a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - rol a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - rolw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - ror a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - rorw a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - rori a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - roriw a0, a0, 1 -# CHECK-NEXT: - 0.50 0.50 - - orc.b a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - rev8 a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - clmul a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - clmulr a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - clmulh a0, a0, a0 -# CHECK-NEXT: - 0.50 0.50 - - bclr a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - bclri a0, a1, 1 -# CHECK-NEXT: - 0.50 0.50 - - bext a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - bexti a0, a1, 1 -# CHECK-NEXT: - 0.50 0.50 - - binv a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - binvi a0, a1, 1 -# CHECK-NEXT: - 0.50 0.50 - - bset a0, a1, a2 -# CHECK-NEXT: - 0.50 0.50 - - bseti a0, a1, 1 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4] [5] [6] Instructions: +# CHECK-NEXT: - 0.50 0.50 - - - - - addi a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - addiw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - slti a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - seqz a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - andi a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - ori a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - xori a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - slli a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - srli a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - srai a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - slliw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - srliw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - sraiw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - lui a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - auipc a1, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - add a0, a0, a1 +# CHECK-NEXT: - 0.50 0.50 - - - - - addw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - slt a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sltu a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - and a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - or a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - xor a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sll a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - srl a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sra a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sllw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - srlw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sraw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sub a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - subw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - jal a0, .Ltmp0 +# CHECK-NEXT: - 1.00 - - - - - - jalr a0 +# CHECK-NEXT: - 1.00 - - - - - - beq a0, a0, .Ltmp1 +# CHECK-NEXT: - 1.00 - - - - - - bne a0, a0, .Ltmp2 +# CHECK-NEXT: - 1.00 - - - - - - blt a0, a0, .Ltmp3 +# CHECK-NEXT: - 1.00 - - - - - - bltu a0, a0, .Ltmp4 +# CHECK-NEXT: - 1.00 - - - - - - bge a0, a0, .Ltmp5 +# CHECK-NEXT: - 1.00 - - - - - - bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: - 0.50 0.50 - - - - - add a0, a0, a0 +# CHECK-NEXT: - - - 0.50 0.50 - - - lb t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - lbu t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - lh t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - lhu t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - lw t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - lwu t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - ld t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - sb t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - sh t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - sw t0, 0(a0) +# CHECK-NEXT: - - - 0.50 0.50 - - - sd t0, 0(a0) +# CHECK-NEXT: - 0.50 0.50 - - - - - mul a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - mulh a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - mulhu a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - mulhsu a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - mulw a0, a0, a0 +# CHECK-NEXT: - 20.00 - - - - - - div a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - - - - divu a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - - - - rem a0, a1, a2 +# CHECK-NEXT: - 20.00 - - - - - - remu a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - - - - divw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - - - - divuw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - - - - remw a0, a1, a2 +# CHECK-NEXT: - 12.00 - - - - - - remuw a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrw t0, 4095, t1 +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrs s3, fflags, s5 +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrc sp, 0, ra +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrwi a5, 0, 0 +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrsi t2, 4095, 31 +# CHECK-NEXT: - 0.50 0.50 - - - - - csrrci t1, sscratch, 5 +# CHECK-NEXT: - 0.50 0.50 - - - - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - add.uw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - slli.uw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh2add.uw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh3add.uw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh1add a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh2add a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sh3add a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - andn a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - orn a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - xnor a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - clz a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - clzw a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - ctz a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - ctzw a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - cpop a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - cpopw a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - min a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - minu a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - max a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - maxu a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sext.b a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - sext.h a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - zext.h a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - rol a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - rolw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - ror a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - rorw a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - rori a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - roriw a0, a0, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - orc.b a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - rev8 a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - clmul a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - clmulr a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - clmulh a0, a0, a0 +# CHECK-NEXT: - 0.50 0.50 - - - - - bclr a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - bclri a0, a1, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - bext a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - bexti a0, a1, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - binv a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - binvi a0, a1, 1 +# CHECK-NEXT: - 0.50 0.50 - - - - - bset a0, a1, a2 +# CHECK-NEXT: - 0.50 0.50 - - - - - bseti a0, a1, 1 diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s new file mode 100644 index 0000000000000..14c93cc9d3a2d --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s @@ -0,0 +1,6820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 -instruction-tables=full < %s | FileCheck %s + +# Basic arithmetic operations + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vim v8, v8, 12, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vaaddu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vaaddu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vaadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vaadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vasubu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vasubu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vasub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vasub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vim v8, v8, 12, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vrsub.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vrsub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vssubu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vssubu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vssub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vssub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.wx v8, v16, x30 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP:1 +# CHECK-NEXT: [1] - SMX60_IEU:2 SMX60_IEUA, SMX60_IEUB +# CHECK-NEXT: [2] - SMX60_IEUA:1 +# CHECK-NEXT: [3] - SMX60_IEUB:1 +# CHECK-NEXT: [4] - SMX60_LS:2 +# CHECK-NEXT: [5] - SMX60_VFP:1 +# CHECK-NEXT: [6] - SMX60_VIEU:1 +# CHECK-NEXT: [7] - SMX60_VLS:1 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) +# CHECK-NEXT: [7]: Bypass Latency +# CHECK-NEXT: [8]: Resources ( | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [,