[PhaseOrdering] add tests for vector select; NFC

rotateright · rotateright · commit fd5e493874e1 · 2021-12-14T14:35:10.000-05:00
The 1st test corresponds to a minimally optimized (mem2reg) version of the example in: issue llvm#52631 The 2nd test copies an existing instcombine test with the same pattern. If we canonicalize differently, we can miss reducing to minimal form in a single invocation of -instcombine, but that should not escape the normal opt pipeline.
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
@@ -1460,3 +1460,87 @@ define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
   %r = and i8 %sign, %y
   ret i8 %r
 }
+
+define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) {
+; CHECK-LABEL: @not_ashr_bitwidth_mask(
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
+; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[POS_OR_ZERO]]
+;
+  %sign = ashr i8 %x, 7
+  %not = xor i8 %sign, -1
+  %pos_or_zero = and i8 %not, %y
+  ret i8 %pos_or_zero
+}
+
+define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
+; CHECK-LABEL: @not_ashr_bitwidth_mask_vec_commute(
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i8> [[SIGN]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[POS_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[NOT]]
+; CHECK-NEXT:    ret <2 x i8> [[POS_OR_ZERO]]
+;
+  %y = mul <2 x i8> %py, <i8 42, i8 2>      ; thwart complexity-based ordering
+  %sign = ashr <2 x i8> %x, <i8 7, i8 7>
+  %not = xor <2 x i8> %sign, <i8 -1, i8 -1>
+  %pos_or_zero = and <2 x i8> %y, %not
+  ret <2 x i8> %pos_or_zero
+}
+
+define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) {
+; CHECK-LABEL: @not_ashr_bitwidth_mask_use1(
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
+; CHECK-NEXT:    call void @use8(i8 [[SIGN]])
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sign = ashr i8 %x, 7
+  call void @use8(i8 %sign)
+  %not = xor i8 %sign, -1
+  %r = and i8 %not, %y
+  ret i8 %r
+}
+
+define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) {
+; CHECK-LABEL: @not_ashr_bitwidth_mask_use2(
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
+; CHECK-NEXT:    call void @use8(i8 [[NOT]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sign = ashr i8 %x, 7
+  %not = xor i8 %sign, -1
+  call void @use8(i8 %not)
+  %r = and i8 %not, %y
+  ret i8 %r
+}
+
+define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) {
+; CHECK-LABEL: @not_ashr_not_bitwidth_mask(
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sign = ashr i8 %x, 6
+  %not = xor i8 %sign, -1
+  %r = and i8 %not, %y
+  ret i8 %r
+}
+
+define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
+; CHECK-LABEL: @not_lshr_bitwidth_mask(
+; CHECK-NEXT:    [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SIGN]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sign = lshr i8 %x, 7
+  %not = xor i8 %sign, -1
+  %r = and i8 %not, %y
+  ret i8 %r
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) {
+; CHECK-LABEL: @PR52631(
+; CHECK-NEXT:    [[ASTYPE:%.*]] = bitcast <3 x float> [[B:%.*]] to <3 x i32>
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = select <3 x i1> [[ISNEG]], <3 x i32> [[ASTYPE]], <3 x i32> zeroinitializer
+; CHECK-NEXT:    [[C_LOBIT2:%.*]] = ashr <3 x i32> [[C]], <i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[C_LOBIT2_NOT:%.*]] = xor <3 x i32> [[C_LOBIT2]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[ASTYPE28:%.*]] = bitcast <3 x float> [[A:%.*]] to <3 x i32>
+; CHECK-NEXT:    [[AND29:%.*]] = and <3 x i32> [[C_LOBIT2_NOT]], [[ASTYPE28]]
+; CHECK-NEXT:    [[OR:%.*]] = or <3 x i32> [[AND29]], [[AND]]
+; CHECK-NEXT:    [[ASTYPE33:%.*]] = bitcast <3 x i32> [[OR]] to <3 x float>
+; CHECK-NEXT:    ret <3 x float> [[ASTYPE33]]
+;
+  %a.addr = alloca <3 x float>, align 16
+  %b.addr = alloca <3 x float>, align 16
+  %c.addr = alloca <3 x i32>, align 16
+  %zero = alloca <3 x i32>, align 16
+  %mask = alloca <3 x i32>, align 16
+  %res = alloca <3 x i32>, align 16
+  %extractVec = shufflevector <3 x float> %a, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp = bitcast <3 x float>* %a.addr to <4 x float>*
+  store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
+  %extractVec1 = shufflevector <3 x float> %b, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp2 = bitcast <3 x float>* %b.addr to <4 x float>*
+  store <4 x float> %extractVec1, <4 x float>* %storetmp2, align 16
+  %extractVec3 = shufflevector <3 x i32> %c, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp4 = bitcast <3 x i32>* %c.addr to <4 x i32>*
+  store <4 x i32> %extractVec3, <4 x i32>* %storetmp4, align 16
+  %t0 = bitcast <3 x i32>* %zero to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %t0) #2
+  %storetmp5 = bitcast <3 x i32>* %zero to <4 x i32>*
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %storetmp5, align 16
+  %t1 = bitcast <3 x i32>* %mask to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %t1) #2
+  %castToVec4 = bitcast <3 x i32>* %zero to <4 x i32>*
+  %loadVec4 = load <4 x i32>, <4 x i32>* %castToVec4, align 16
+  %extractVec6 = shufflevector <4 x i32> %loadVec4, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %castToVec47 = bitcast <3 x i32>* %c.addr to <4 x i32>*
+  %loadVec48 = load <4 x i32>, <4 x i32>* %castToVec47, align 16
+  %extractVec9 = shufflevector <4 x i32> %loadVec48, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %cmp = icmp sgt <3 x i32> %extractVec6, %extractVec9
+  %sext = sext <3 x i1> %cmp to <3 x i32>
+  %extractVec10 = shufflevector <3 x i32> %sext, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp11 = bitcast <3 x i32>* %mask to <4 x i32>*
+  store <4 x i32> %extractVec10, <4 x i32>* %storetmp11, align 16
+  %t2 = bitcast <3 x i32>* %res to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %t2) #2
+  %castToVec412 = bitcast <3 x i32>* %mask to <4 x i32>*
+  %loadVec413 = load <4 x i32>, <4 x i32>* %castToVec412, align 16
+  %extractVec14 = shufflevector <4 x i32> %loadVec413, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %castToVec415 = bitcast <3 x float>* %b.addr to <4 x float>*
+  %loadVec416 = load <4 x float>, <4 x float>* %castToVec415, align 16
+  %extractVec17 = shufflevector <4 x float> %loadVec416, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %astype = bitcast <3 x float> %extractVec17 to <3 x i32>
+  %and = and <3 x i32> %extractVec14, %astype
+  %extractVec18 = shufflevector <3 x i32> %and, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp19 = bitcast <3 x i32>* %res to <4 x i32>*
+  store <4 x i32> %extractVec18, <4 x i32>* %storetmp19, align 16
+  %castToVec420 = bitcast <3 x i32>* %mask to <4 x i32>*
+  %loadVec421 = load <4 x i32>, <4 x i32>* %castToVec420, align 16
+  %extractVec22 = shufflevector <4 x i32> %loadVec421, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %cmp23 = icmp eq <3 x i32> %extractVec22, zeroinitializer
+  %sext24 = sext <3 x i1> %cmp23 to <3 x i32>
+  %castToVec425 = bitcast <3 x float>* %a.addr to <4 x float>*
+  %loadVec426 = load <4 x float>, <4 x float>* %castToVec425, align 16
+  %extractVec27 = shufflevector <4 x float> %loadVec426, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %astype28 = bitcast <3 x float> %extractVec27 to <3 x i32>
+  %and29 = and <3 x i32> %sext24, %astype28
+  %castToVec430 = bitcast <3 x i32>* %res to <4 x i32>*
+  %loadVec431 = load <4 x i32>, <4 x i32>* %castToVec430, align 16
+  %extractVec32 = shufflevector <4 x i32> %loadVec431, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %or = or <3 x i32> %and29, %extractVec32
+  %astype33 = bitcast <3 x i32> %or to <3 x float>
+  %t3 = bitcast <3 x i32>* %res to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %t3) #2
+  %t4 = bitcast <3 x i32>* %mask to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %t4) #2
+  %t5 = bitcast <3 x i32>* %zero to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %t5) #2
+  ret <3 x float> %astype33
+}
+
+define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
+; CHECK-LABEL: @allSignBits_vec(
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
+; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+;
+  %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
+  %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
+  %a1 = and <4 x i8> %tval, %bitmask
+  %a2 = and <4 x i8> %fval, %not_bitmask
+  %sel = or <4 x i8> %a2, %a1
+  ret <4 x i8> %sel
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1