You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV] Don't lose elements from False in vmerge -> vmv peephole (#149720)
In the vmerge peephole, we currently allow different AVLs for the vmerge
and its true operand.
If vmerge's VL > true's VL, vmerge can "preserve" elements from false
that would otherwise be clobbered with a tail agnostic policy on true.
mask 1 1 1 1 0 0 0 0
true x x x x|. . . . AVL=4
vmerge x x x x f f|. . AVL=6
If we convert this to vmv.v.v we will lose those false elements:
mask 1 1 1 1 0 0 0 0
true x x x x|. . . . AVL=4
vmv.v.v x x x x . .|. . AVL=6
Fix this by checking that vmerge's AVL is <= true's AVL.
Should fix#149335
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
+28-7Lines changed: 28 additions & 7 deletions
Original file line number
Diff line number
Diff line change
@@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
71
71
ret <vscale x 8 x i64> %1
72
72
}
73
73
74
-
declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
75
-
declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
76
-
declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
77
-
declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
78
-
declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
79
-
declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
80
-
declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
74
+
; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
75
+
define <vscale x 2 x i32> @preserve_false(ptr%p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64%avl1, i64%avl2) {
76
+
; CHECK-LABEL: preserve_false:
77
+
; CHECK: # %bb.0:
78
+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
79
+
; CHECK-NEXT: vmv1r.v v10, v9
80
+
; CHECK-NEXT: vle32.v v10, (a0), v0.t
81
+
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
82
+
; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
83
+
; CHECK-NEXT: ret
84
+
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr%p, <vscale x 2 x i1> %mask, i64%avl1, i643)
85
+
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64%avl2)
86
+
ret <vscale x 2 x i32> %res
87
+
}
88
+
89
+
; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
90
+
define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr%p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
91
+
; CHECK-LABEL: preserve_false_avl_known_le:
92
+
; CHECK: # %bb.0:
93
+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
94
+
; CHECK-NEXT: vle32.v v9, (a0), v0.t
95
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
96
+
; CHECK-NEXT: vmv.v.v v8, v9
97
+
; CHECK-NEXT: ret
98
+
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr%p, <vscale x 2 x i1> %mask, i642, i643)
99
+
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i641)
0 commit comments