Skip to content

Commit fd5e493

Browse files
committed
[PhaseOrdering] add tests for vector select; NFC
The 1st test corresponds to a minimally optimized (mem2reg) version of the example in: issue llvm#52631 The 2nd test copies an existing instcombine test with the same pattern. If we canonicalize differently, we can miss reducing to minimal form in a single invocation of -instcombine, but that should not escape the normal opt pipeline.
1 parent efefc4e commit fd5e493

File tree

2 files changed

+185
-0
lines changed

2 files changed

+185
-0
lines changed

llvm/test/Transforms/InstCombine/and.ll

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,3 +1460,87 @@ define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
14601460
%r = and i8 %sign, %y
14611461
ret i8 %r
14621462
}
1463+
1464+
define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
1465+
; CHECK-LABEL: @not_ashr_bitwidth_mask(
1466+
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
1467+
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
1468+
; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and i8 [[NOT]], [[Y:%.*]]
1469+
; CHECK-NEXT: ret i8 [[POS_OR_ZERO]]
1470+
;
1471+
%sign = ashr i8 %x, 7
1472+
%not = xor i8 %sign, -1
1473+
%pos_or_zero = and i8 %not, %y
1474+
ret i8 %pos_or_zero
1475+
}
1476+
1477+
define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
1478+
; CHECK-LABEL: @not_ashr_bitwidth_mask_vec_commute(
1479+
; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
1480+
; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
1481+
; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i8> [[SIGN]], <i8 -1, i8 -1>
1482+
; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[NOT]]
1483+
; CHECK-NEXT: ret <2 x i8> [[POS_OR_ZERO]]
1484+
;
1485+
%y = mul <2 x i8> %py, <i8 42, i8 2> ; thwart complexity-based ordering
1486+
%sign = ashr <2 x i8> %x, <i8 7, i8 7>
1487+
%not = xor <2 x i8> %sign, <i8 -1, i8 -1>
1488+
%pos_or_zero = and <2 x i8> %y, %not
1489+
ret <2 x i8> %pos_or_zero
1490+
}
1491+
1492+
define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
1493+
; CHECK-LABEL: @not_ashr_bitwidth_mask_use1(
1494+
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
1495+
; CHECK-NEXT: call void @use8(i8 [[SIGN]])
1496+
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
1497+
; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
1498+
; CHECK-NEXT: ret i8 [[R]]
1499+
;
1500+
%sign = ashr i8 %x, 7
1501+
call void @use8(i8 %sign)
1502+
%not = xor i8 %sign, -1
1503+
%r = and i8 %not, %y
1504+
ret i8 %r
1505+
}
1506+
1507+
define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
1508+
; CHECK-LABEL: @not_ashr_bitwidth_mask_use2(
1509+
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
1510+
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
1511+
; CHECK-NEXT: call void @use8(i8 [[NOT]])
1512+
; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
1513+
; CHECK-NEXT: ret i8 [[R]]
1514+
;
1515+
%sign = ashr i8 %x, 7
1516+
%not = xor i8 %sign, -1
1517+
call void @use8(i8 %not)
1518+
%r = and i8 %not, %y
1519+
ret i8 %r
1520+
}
1521+
1522+
define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
1523+
; CHECK-LABEL: @not_ashr_not_bitwidth_mask(
1524+
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
1525+
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
1526+
; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
1527+
; CHECK-NEXT: ret i8 [[R]]
1528+
;
1529+
%sign = ashr i8 %x, 6
1530+
%not = xor i8 %sign, -1
1531+
%r = and i8 %not, %y
1532+
ret i8 %r
1533+
}
1534+
1535+
define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
1536+
; CHECK-LABEL: @not_lshr_bitwidth_mask(
1537+
; CHECK-NEXT: [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7
1538+
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1
1539+
; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
1540+
; CHECK-NEXT: ret i8 [[R]]
1541+
;
1542+
%sign = lshr i8 %x, 7
1543+
%not = xor i8 %sign, -1
1544+
%r = and i8 %not, %y
1545+
ret i8 %r
1546+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -O2 -S < %s | FileCheck %s
3+
4+
define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
5+
; CHECK-LABEL: @PR52631(
6+
; CHECK-NEXT: [[ASTYPE:%.*]] = bitcast <3 x float> [[B:%.*]] to <3 x i32>
7+
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
8+
; CHECK-NEXT: [[AND:%.*]] = select <3 x i1> [[ISNEG]], <3 x i32> [[ASTYPE]], <3 x i32> zeroinitializer
9+
; CHECK-NEXT: [[C_LOBIT2:%.*]] = ashr <3 x i32> [[C]], <i32 31, i32 31, i32 31>
10+
; CHECK-NEXT: [[C_LOBIT2_NOT:%.*]] = xor <3 x i32> [[C_LOBIT2]], <i32 -1, i32 -1, i32 -1>
11+
; CHECK-NEXT: [[ASTYPE28:%.*]] = bitcast <3 x float> [[A:%.*]] to <3 x i32>
12+
; CHECK-NEXT: [[AND29:%.*]] = and <3 x i32> [[C_LOBIT2_NOT]], [[ASTYPE28]]
13+
; CHECK-NEXT: [[OR:%.*]] = or <3 x i32> [[AND29]], [[AND]]
14+
; CHECK-NEXT: [[ASTYPE33:%.*]] = bitcast <3 x i32> [[OR]] to <3 x float>
15+
; CHECK-NEXT: ret <3 x float> [[ASTYPE33]]
16+
;
17+
%a.addr = alloca <3 x float>, align 16
18+
%b.addr = alloca <3 x float>, align 16
19+
%c.addr = alloca <3 x i32>, align 16
20+
%zero = alloca <3 x i32>, align 16
21+
%mask = alloca <3 x i32>, align 16
22+
%res = alloca <3 x i32>, align 16
23+
%extractVec = shufflevector <3 x float> %a, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
24+
%storetmp = bitcast <3 x float>* %a.addr to <4 x float>*
25+
store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
26+
%extractVec1 = shufflevector <3 x float> %b, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
27+
%storetmp2 = bitcast <3 x float>* %b.addr to <4 x float>*
28+
store <4 x float> %extractVec1, <4 x float>* %storetmp2, align 16
29+
%extractVec3 = shufflevector <3 x i32> %c, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
30+
%storetmp4 = bitcast <3 x i32>* %c.addr to <4 x i32>*
31+
store <4 x i32> %extractVec3, <4 x i32>* %storetmp4, align 16
32+
%t0 = bitcast <3 x i32>* %zero to i8*
33+
call void @llvm.lifetime.start.p0i8(i64 16, i8* %t0) #2
34+
%storetmp5 = bitcast <3 x i32>* %zero to <4 x i32>*
35+
store <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %storetmp5, align 16
36+
%t1 = bitcast <3 x i32>* %mask to i8*
37+
call void @llvm.lifetime.start.p0i8(i64 16, i8* %t1) #2
38+
%castToVec4 = bitcast <3 x i32>* %zero to <4 x i32>*
39+
%loadVec4 = load <4 x i32>, <4 x i32>* %castToVec4, align 16
40+
%extractVec6 = shufflevector <4 x i32> %loadVec4, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
41+
%castToVec47 = bitcast <3 x i32>* %c.addr to <4 x i32>*
42+
%loadVec48 = load <4 x i32>, <4 x i32>* %castToVec47, align 16
43+
%extractVec9 = shufflevector <4 x i32> %loadVec48, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
44+
%cmp = icmp sgt <3 x i32> %extractVec6, %extractVec9
45+
%sext = sext <3 x i1> %cmp to <3 x i32>
46+
%extractVec10 = shufflevector <3 x i32> %sext, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
47+
%storetmp11 = bitcast <3 x i32>* %mask to <4 x i32>*
48+
store <4 x i32> %extractVec10, <4 x i32>* %storetmp11, align 16
49+
%t2 = bitcast <3 x i32>* %res to i8*
50+
call void @llvm.lifetime.start.p0i8(i64 16, i8* %t2) #2
51+
%castToVec412 = bitcast <3 x i32>* %mask to <4 x i32>*
52+
%loadVec413 = load <4 x i32>, <4 x i32>* %castToVec412, align 16
53+
%extractVec14 = shufflevector <4 x i32> %loadVec413, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
54+
%castToVec415 = bitcast <3 x float>* %b.addr to <4 x float>*
55+
%loadVec416 = load <4 x float>, <4 x float>* %castToVec415, align 16
56+
%extractVec17 = shufflevector <4 x float> %loadVec416, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
57+
%astype = bitcast <3 x float> %extractVec17 to <3 x i32>
58+
%and = and <3 x i32> %extractVec14, %astype
59+
%extractVec18 = shufflevector <3 x i32> %and, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
60+
%storetmp19 = bitcast <3 x i32>* %res to <4 x i32>*
61+
store <4 x i32> %extractVec18, <4 x i32>* %storetmp19, align 16
62+
%castToVec420 = bitcast <3 x i32>* %mask to <4 x i32>*
63+
%loadVec421 = load <4 x i32>, <4 x i32>* %castToVec420, align 16
64+
%extractVec22 = shufflevector <4 x i32> %loadVec421, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
65+
%cmp23 = icmp eq <3 x i32> %extractVec22, zeroinitializer
66+
%sext24 = sext <3 x i1> %cmp23 to <3 x i32>
67+
%castToVec425 = bitcast <3 x float>* %a.addr to <4 x float>*
68+
%loadVec426 = load <4 x float>, <4 x float>* %castToVec425, align 16
69+
%extractVec27 = shufflevector <4 x float> %loadVec426, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
70+
%astype28 = bitcast <3 x float> %extractVec27 to <3 x i32>
71+
%and29 = and <3 x i32> %sext24, %astype28
72+
%castToVec430 = bitcast <3 x i32>* %res to <4 x i32>*
73+
%loadVec431 = load <4 x i32>, <4 x i32>* %castToVec430, align 16
74+
%extractVec32 = shufflevector <4 x i32> %loadVec431, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
75+
%or = or <3 x i32> %and29, %extractVec32
76+
%astype33 = bitcast <3 x i32> %or to <3 x float>
77+
%t3 = bitcast <3 x i32>* %res to i8*
78+
call void @llvm.lifetime.end.p0i8(i64 16, i8* %t3) #2
79+
%t4 = bitcast <3 x i32>* %mask to i8*
80+
call void @llvm.lifetime.end.p0i8(i64 16, i8* %t4) #2
81+
%t5 = bitcast <3 x i32>* %zero to i8*
82+
call void @llvm.lifetime.end.p0i8(i64 16, i8* %t5) #2
83+
ret <3 x float> %astype33
84+
}
85+
86+
define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
87+
; CHECK-LABEL: @allSignBits_vec(
88+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
89+
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
90+
; CHECK-NEXT: ret <4 x i8> [[TMP1]]
91+
;
92+
%bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
93+
%not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
94+
%a1 = and <4 x i8> %tval, %bitmask
95+
%a2 = and <4 x i8> %fval, %not_bitmask
96+
%sel = or <4 x i8> %a2, %a1
97+
ret <4 x i8> %sel
98+
}
99+
100+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
101+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1

0 commit comments

Comments
 (0)