Skip to content

Commit 79bf8c0

Browse files
wenju-hedtcxzywarsenm
authored
[InstCombine] Fold select(X >s 0, 0, -X) | smax(X, 0) to abs(X) (#165200)
The IR pattern is compiled from OpenCL code: __builtin_astype(x > (uchar2)(0) ? x : -x, uchar2); where smax is created by foldSelectInstWithICmp + canonicalizeSPF. smax could also come from direct elementwise max call: int c = b > (int)(0) ? (int)(0) : -b; int d = __builtin_elementwise_max(b, (int)(0)); *a = c | d; https://alive2.llvm.org/ce/z/2-brvr https://alive2.llvm.org/ce/z/Dowjzk https://alive2.llvm.org/ce/z/kathwZ --------- Co-authored-by: Yingwei Zheng <[email protected]> Co-authored-by: Matt Arsenault <[email protected]>
1 parent 0a27415 commit 79bf8c0

File tree

4 files changed

+124
-5
lines changed

4 files changed

+124
-5
lines changed

clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3584,13 +3584,13 @@ void test_integer(void) {
35843584
// CHECK-ASM: vsrlb
35853585

35863586
vsc = vec_abs(vsc);
3587-
// CHECK-ASM: vlcb
3587+
// CHECK-ASM: vlpb
35883588
vss = vec_abs(vss);
3589-
// CHECK-ASM: vlch
3589+
// CHECK-ASM: vlph
35903590
vsi = vec_abs(vsi);
3591-
// CHECK-ASM: vlcf
3591+
// CHECK-ASM: vlpf
35923592
vsl = vec_abs(vsl);
3593-
// CHECK-ASM: vlcg
3593+
// CHECK-ASM: vlpg
35943594

35953595
vsc = vec_max(vsc, vsc);
35963596
// CHECK-ASM: vmxb

clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ void test_integer(void) {
246246
// CHECK-ASM: vctzq
247247

248248
vslll = vec_abs(vslll);
249-
// CHECK-ASM: vlcq
249+
// CHECK-ASM: vlpq
250250

251251
vslll = vec_avg(vslll, vslll);
252252
// CHECK: call i128 @llvm.s390.vavgq(i128 %{{.*}}, i128 %{{.*}})

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
39973997
return nullptr;
39983998
}
39993999

4000+
/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X)
4001+
/// select(X <s 0, -X, 0) | smax(X, 0) --> abs(X)
4002+
static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I,
4003+
InstCombiner::BuilderTy &Builder) {
4004+
Value *X;
4005+
Value *Sel;
4006+
if (match(&I,
4007+
m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) {
4008+
auto NegX = m_Neg(m_Specific(X));
4009+
if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X),
4010+
m_ZeroInt()),
4011+
m_ZeroInt(), NegX)) ||
4012+
match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X),
4013+
m_ZeroInt()),
4014+
NegX, m_ZeroInt())))
4015+
return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X,
4016+
Builder.getFalse());
4017+
}
4018+
return nullptr;
4019+
}
4020+
40004021
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
40014022
// here. We should standardize that construct where it is needed or choose some
40024023
// other way to ensure that commutated variants of patterns are not missed.
@@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
45454566
if (Value *V = SimplifyAddWithRemainder(I))
45464567
return replaceInstUsesWith(I, V);
45474568

4569+
if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
4570+
return replaceInstUsesWith(I, Res);
4571+
45484572
return nullptr;
45494573
}
45504574

llvm/test/Transforms/InstCombine/or.ll

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,3 +2113,98 @@ define <4 x i32> @or_zext_nneg_minus_constant_splat(<4 x i8> %a) {
21132113
%or = or <4 x i32> %zext, splat (i32 -9)
21142114
ret <4 x i32> %or
21152115
}
2116+
2117+
define i8 @or_positive_minus_non_positive_to_abs(i8 %a){
2118+
; CHECK-LABEL: @or_positive_minus_non_positive_to_abs(
2119+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false)
2120+
; CHECK-NEXT: ret i8 [[TMP2]]
2121+
;
2122+
%b = icmp sgt i8 %a, 0
2123+
%mask = sext i1 %b to i8
2124+
%neg = sub i8 0, %a
2125+
%mask_inv = xor i8 %mask, -1
2126+
%c = and i8 %neg, %mask_inv
2127+
%d = and i8 %a, %mask
2128+
%or = or i8 %c, %d
2129+
ret i8 %or
2130+
}
2131+
2132+
; TODO: Fold to smax https://alive2.llvm.org/ce/z/wDiDh2
2133+
define i8 @or_select_smax_neg_to_abs(i8 %a){
2134+
; CHECK-LABEL: @or_select_smax_neg_to_abs(
2135+
; CHECK-NEXT: [[SGT0:%.*]] = icmp sgt i8 [[A:%.*]], 0
2136+
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A]]
2137+
; CHECK-NEXT: [[OR:%.*]] = select i1 [[SGT0]], i8 0, i8 [[NEG]]
2138+
; CHECK-NEXT: ret i8 [[OR]]
2139+
;
2140+
%sgt0 = icmp sgt i8 %a, 0
2141+
%neg = sub nsw i8 0, %a
2142+
%sel = select i1 %sgt0, i8 0, i8 %neg
2143+
ret i8 %sel
2144+
}
2145+
2146+
; TODO: Fold to abs https://alive2.llvm.org/ce/z/DybfHG
2147+
define i8 @or_select_smax_smax_to_abs(i8 %a){
2148+
; CHECK-LABEL: @or_select_smax_smax_to_abs(
2149+
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A:%.*]]
2150+
; CHECK-NEXT: [[SEL:%.*]] = call i8 @llvm.smax.i8(i8 [[NEG]], i8 0)
2151+
; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 0)
2152+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[SEL]], [[MAX]]
2153+
; CHECK-NEXT: ret i8 [[OR]]
2154+
;
2155+
%neg = sub nsw i8 0, %a
2156+
%sel = call i8 @llvm.smax.i8(i8 %neg, i8 0)
2157+
%max = call i8 @llvm.smax.i8(i8 %a, i8 0)
2158+
%or = or i8 %sel, %max
2159+
ret i8 %or
2160+
}
2161+
2162+
declare i8 @llvm.abs.i8(i8, i1)
2163+
declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
2164+
2165+
define <2 x i8> @or_sgt_select_smax_to_abs(<2 x i8> %a){
2166+
; CHECK-LABEL: @or_sgt_select_smax_to_abs(
2167+
; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false)
2168+
; CHECK-NEXT: ret <2 x i8> [[OR]]
2169+
;
2170+
%sgt0 = icmp sgt <2 x i8> %a, zeroinitializer
2171+
%neg = sub <2 x i8> zeroinitializer, %a
2172+
%sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg
2173+
%max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
2174+
%or = or <2 x i8> %sel, %max
2175+
ret <2 x i8> %or
2176+
}
2177+
2178+
define <2 x i8> @or_slt_select_smax_to_abs(<2 x i8> %a){
2179+
; CHECK-LABEL: @or_slt_select_smax_to_abs(
2180+
; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false)
2181+
; CHECK-NEXT: ret <2 x i8> [[OR]]
2182+
;
2183+
%slt0 = icmp slt <2 x i8> %a, zeroinitializer
2184+
%neg = sub <2 x i8> zeroinitializer, %a
2185+
%sel = select <2 x i1> %slt0, <2 x i8> %neg, <2 x i8> zeroinitializer
2186+
%max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
2187+
%or = or <2 x i8> %sel, %max
2188+
ret <2 x i8> %or
2189+
}
2190+
2191+
; negative test - %d has multiple uses. %or is not folded to abs.
2192+
2193+
define <2 x i8> @or_select_smax_multi_uses(<2 x i8> %a){
2194+
; CHECK-LABEL: @or_select_smax_multi_uses(
2195+
; CHECK-NEXT: [[B:%.*]] = icmp sgt <2 x i8> [[A:%.*]], zeroinitializer
2196+
; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[A]]
2197+
; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[B]], <2 x i8> zeroinitializer, <2 x i8> [[NEG]]
2198+
; CHECK-NEXT: [[D:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[A]], <2 x i8> zeroinitializer)
2199+
; CHECK-NEXT: [[OR1:%.*]] = or <2 x i8> [[C]], [[D]]
2200+
; CHECK-NEXT: [[OR:%.*]] = add <2 x i8> [[OR1]], [[D]]
2201+
; CHECK-NEXT: ret <2 x i8> [[OR]]
2202+
;
2203+
%sgt0 = icmp sgt <2 x i8> %a, zeroinitializer
2204+
%neg = sub <2 x i8> zeroinitializer, %a
2205+
%sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg
2206+
%max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
2207+
%or = or <2 x i8> %sel, %max
2208+
%add = add <2 x i8> %or, %max
2209+
ret <2 x i8> %add
2210+
}

0 commit comments

Comments
 (0)