@@ -15265,6 +15265,259 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
1526515265 ret <4 x i32> %x
1526615266}
1526715267
15268+ define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7 x i8> %passthru) {
15269+ ; RV32-LABEL: mgather_baseidx_v7i8:
15270+ ; RV32: # %bb.0:
15271+ ; RV32-NEXT: li a1, 127
15272+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
15273+ ; RV32-NEXT: vmv.s.x v10, a1
15274+ ; RV32-NEXT: vmand.mm v0, v0, v10
15275+ ; RV32-NEXT: vsext.vf4 v10, v8
15276+ ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
15277+ ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
15278+ ; RV32-NEXT: vmv1r.v v8, v9
15279+ ; RV32-NEXT: ret
15280+ ;
15281+ ; RV64V-LABEL: mgather_baseidx_v7i8:
15282+ ; RV64V: # %bb.0:
15283+ ; RV64V-NEXT: li a1, 127
15284+ ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
15285+ ; RV64V-NEXT: vmv.s.x v10, a1
15286+ ; RV64V-NEXT: vmand.mm v0, v0, v10
15287+ ; RV64V-NEXT: vsext.vf8 v12, v8
15288+ ; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
15289+ ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
15290+ ; RV64V-NEXT: vmv1r.v v8, v9
15291+ ; RV64V-NEXT: ret
15292+ ;
15293+ ; RV64ZVE32F-LABEL: mgather_baseidx_v7i8:
15294+ ; RV64ZVE32F: # %bb.0:
15295+ ; RV64ZVE32F-NEXT: addi sp, sp, -16
15296+ ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16
15297+ ; RV64ZVE32F-NEXT: .cfi_remember_state
15298+ ; RV64ZVE32F-NEXT: li a1, 64
15299+ ; RV64ZVE32F-NEXT: addi a2, sp, 8
15300+ ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m4, ta, ma
15301+ ; RV64ZVE32F-NEXT: vsm.v v0, (a2)
15302+ ; RV64ZVE32F-NEXT: ld a1, 8(sp)
15303+ ; RV64ZVE32F-NEXT: andi a2, a1, 1
15304+ ; RV64ZVE32F-NEXT: beqz a2, .LBB132_2
15305+ ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
15306+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15307+ ; RV64ZVE32F-NEXT: add a2, a0, a2
15308+ ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15309+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15310+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15311+ ; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15312+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15313+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15314+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15315+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15316+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15317+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15318+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15319+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15320+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15321+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15322+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
15323+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15324+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15325+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15326+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a2
15327+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15328+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15329+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15330+ ; RV64ZVE32F-NEXT: .LBB132_2: # %else
15331+ ; RV64ZVE32F-NEXT: andi a2, a1, 2
15332+ ; RV64ZVE32F-NEXT: beqz a2, .LBB132_4
15333+ ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
15334+ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15335+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
15336+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15337+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15338+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
15339+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15340+ ; RV64ZVE32F-NEXT: add a3, a0, a3
15341+ ; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15342+ ; RV64ZVE32F-NEXT: vmv.v.x v10, a2
15343+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
15344+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
15345+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15346+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15347+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 4
15348+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15349+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
15350+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
15351+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15352+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15353+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15354+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15355+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15356+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15357+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15358+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15359+ ; RV64ZVE32F-NEXT: .LBB132_4: # %else2
15360+ ; RV64ZVE32F-NEXT: andi a2, a1, 4
15361+ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
15362+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
15363+ ; RV64ZVE32F-NEXT: beqz a2, .LBB132_6
15364+ ; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
15365+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15366+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15367+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15368+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
15369+ ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 3
15370+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15371+ ; RV64ZVE32F-NEXT: vmv.v.x v11, a3
15372+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v12
15373+ ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 4
15374+ ; RV64ZVE32F-NEXT: add a2, a0, a2
15375+ ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15376+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15377+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v12
15378+ ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 5
15379+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15380+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15381+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
15382+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a3
15383+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15384+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15385+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15386+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v11, a2
15387+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15388+ ; RV64ZVE32F-NEXT: .LBB132_6: # %else5
15389+ ; RV64ZVE32F-NEXT: andi a2, a1, 8
15390+ ; RV64ZVE32F-NEXT: beqz a2, .LBB132_8
15391+ ; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
15392+ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15393+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
15394+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15395+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15396+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15397+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15398+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15399+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15400+ ; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15401+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15402+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15403+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15404+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15405+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 5
15406+ ; RV64ZVE32F-NEXT: add a3, a0, a3
15407+ ; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15408+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15409+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15410+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15411+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a3
15412+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15413+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15414+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15415+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15416+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15417+ ; RV64ZVE32F-NEXT: .LBB132_8: # %else8
15418+ ; RV64ZVE32F-NEXT: andi a2, a1, 16
15419+ ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
15420+ ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
15421+ ; RV64ZVE32F-NEXT: bnez a2, .LBB132_13
15422+ ; RV64ZVE32F-NEXT: # %bb.9: # %else11
15423+ ; RV64ZVE32F-NEXT: andi a2, a1, 32
15424+ ; RV64ZVE32F-NEXT: bnez a2, .LBB132_14
15425+ ; RV64ZVE32F-NEXT: .LBB132_10: # %else14
15426+ ; RV64ZVE32F-NEXT: andi a1, a1, 64
15427+ ; RV64ZVE32F-NEXT: beqz a1, .LBB132_12
15428+ ; RV64ZVE32F-NEXT: .LBB132_11: # %cond.load16
15429+ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
15430+ ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
15431+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15432+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15433+ ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
15434+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15435+ ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
15436+ ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
15437+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15438+ ; RV64ZVE32F-NEXT: add a0, a0, a2
15439+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15440+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15441+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15442+ ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
15443+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15444+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 5
15445+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
15446+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15447+ ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
15448+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15449+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
15450+ ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
15451+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
15452+ ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
15453+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
15454+ ; RV64ZVE32F-NEXT: .LBB132_12: # %else17
15455+ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
15456+ ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
15457+ ; RV64ZVE32F-NEXT: addi sp, sp, 16
15458+ ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
15459+ ; RV64ZVE32F-NEXT: ret
15460+ ; RV64ZVE32F-NEXT: .LBB132_13: # %cond.load10
15461+ ; RV64ZVE32F-NEXT: .cfi_restore_state
15462+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
15463+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
15464+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
15465+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
15466+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15467+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15468+ ; RV64ZVE32F-NEXT: vmv.v.x v10, a3
15469+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15470+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 3
15471+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15472+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15473+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 5
15474+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15475+ ; RV64ZVE32F-NEXT: add a2, a0, a2
15476+ ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
15477+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15478+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
15479+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a4
15480+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15481+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15482+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15483+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15484+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15485+ ; RV64ZVE32F-NEXT: andi a2, a1, 32
15486+ ; RV64ZVE32F-NEXT: beqz a2, .LBB132_10
15487+ ; RV64ZVE32F-NEXT: .LBB132_14: # %cond.load13
15488+ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
15489+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
15490+ ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15491+ ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 1
15492+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15493+ ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
15494+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2
15495+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v11
15496+ ; RV64ZVE32F-NEXT: vmv.v.x v11, a2
15497+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15498+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 3
15499+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a4
15500+ ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
15501+ ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
15502+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 6
15503+ ; RV64ZVE32F-NEXT: add a3, a0, a3
15504+ ; RV64ZVE32F-NEXT: lbu a3, 0(a3)
15505+ ; RV64ZVE32F-NEXT: vslide1down.vx v11, v11, a2
15506+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
15507+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v11, a4
15508+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a2
15509+ ; RV64ZVE32F-NEXT: vslide1down.vx v10, v10, a3
15510+ ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
15511+ ; RV64ZVE32F-NEXT: vslide1down.vx v9, v10, a2
15512+ ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
15513+ ; RV64ZVE32F-NEXT: andi a1, a1, 64
15514+ ; RV64ZVE32F-NEXT: bnez a1, .LBB132_11
15515+ ; RV64ZVE32F-NEXT: j .LBB132_12
15516+ %ptrs = getelementptr inbounds i8, ptr %base, <7 x i8> %idxs
15517+ %v = call <7 x i8> @llvm.masked.gather.v7i8.v7p0(<7 x ptr> %ptrs, i32 1, <7 x i1> %m, <7 x i8> %passthru)
15518+ ret <7 x i8> %v
15519+ }
15520+
1526815521;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1526915522; RV32V-ZVFH: {{.*}}
1527015523; RV32V-ZVFHMIN: {{.*}}
0 commit comments