Skip to content

Commit ef5bab7

Browse files
[InstCombine] Fold select(load, val) + store into llvm.masked.store
This patch adds a new InstCombine optimization that transforms a pattern of the form: ``` %load = load <8 x i32>, ptr %ptr, align 32 %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load store <8 x i32> %sel, ptr %ptr, align 32 ``` into: ``` @llvm.masked.store.v8i32.p0(<8 x i32> %x, ptr %ptr, i32 32, <8 x i1> %cmp) ```
1 parent dec5765 commit ef5bab7

File tree

3 files changed

+109
-2
lines changed

3 files changed

+109
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,48 @@ static bool equivalentAddressValues(Value *A, Value *B) {
13631363
return false;
13641364
}
13651365

1366+
// Combine
1367+
// %load = load <8 x i32>, ptr %ptr, align 32
1368+
// %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
1369+
// store <8 x i32> %sel, ptr %ptr, align 32
1370+
// to
1371+
// @llvm.masked.store.v8i32.p0(<8 x i32> %x, ptr %ptr, i32 32, <8 x i1> %cmp)
1372+
static bool combineToMaskedStore(InstCombinerImpl &IC, StoreInst &Store) {
1373+
Value *StoredValue = Store.getValueOperand();
1374+
auto *Select = dyn_cast<SelectInst>(StoredValue);
1375+
if (!Select || !StoredValue->getType()->isVectorTy())
1376+
return false;
1377+
1378+
Value *Condition = Select->getCondition();
1379+
Value *TrueValue = Select->getTrueValue();
1380+
Value *FalseValue = Select->getFalseValue();
1381+
1382+
const auto *Load = dyn_cast<LoadInst>(FalseValue);
1383+
if (!Load || Load->getPointerOperand() != Store.getPointerOperand())
1384+
return false;
1385+
1386+
if (Load->isVolatile() || Store.isVolatile() || Load->isAtomic() ||
1387+
Store.isAtomic())
1388+
return false;
1389+
1390+
Value *Pointer = Store.getPointerOperand();
1391+
1392+
for (const auto *I = Load->getNextNode(); I && I != &Store;
1393+
I = I->getNextNode()) {
1394+
if (I->mayHaveSideEffects())
1395+
return false;
1396+
1397+
if (const auto *OtherStore = dyn_cast<StoreInst>(I)) {
1398+
if (OtherStore->getPointerOperand() == Pointer)
1399+
return false;
1400+
}
1401+
}
1402+
1403+
IC.Builder.CreateMaskedStore(TrueValue, Pointer, Store.getAlign(), Condition);
1404+
1405+
return true;
1406+
}
1407+
13661408
Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
13671409
Value *Val = SI.getOperand(0);
13681410
Value *Ptr = SI.getOperand(1);
@@ -1375,6 +1417,9 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
13751417
if (unpackStoreToAggregate(*this, SI))
13761418
return eraseInstFromFunction(SI);
13771419

1420+
if (combineToMaskedStore(*this, SI))
1421+
return eraseInstFromFunction(SI);
1422+
13781423
// Replace GEP indices if possible.
13791424
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
13801425
return replaceOperand(SI, 1, NewGEPI);
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3+
4+
define void @test_masked_store_success(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
5+
; CHECK-LABEL: define void @test_masked_store_success(
6+
; CHECK-SAME: <8 x i32> [[X:%.*]], ptr [[PTR:%.*]], <8 x i1> [[CMP:%.*]]) {
7+
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[X]], ptr [[PTR]], i32 32, <8 x i1> [[CMP]])
8+
; CHECK-NEXT: ret void
9+
;
10+
%load = load <8 x i32>, ptr %ptr, align 32
11+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
12+
store <8 x i32> %sel, ptr %ptr, align 32
13+
ret void
14+
}
15+
16+
define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
17+
; CHECK-LABEL: define void @test_masked_store_volatile_load(
18+
; CHECK-SAME: <8 x i32> [[X:%.*]], ptr [[PTR:%.*]], <8 x i1> [[CMP:%.*]]) {
19+
; CHECK-NEXT: [[LOAD:%.*]] = load volatile <8 x i32>, ptr [[PTR]], align 32
20+
; CHECK-NEXT: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[LOAD]]
21+
; CHECK-NEXT: store <8 x i32> [[SEL]], ptr [[PTR]], align 32
22+
; CHECK-NEXT: ret void
23+
;
24+
%load = load volatile <8 x i32>, ptr %ptr, align 32
25+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
26+
store <8 x i32> %sel, ptr %ptr, align 32
27+
ret void
28+
}
29+
30+
define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
31+
; CHECK-LABEL: define void @test_masked_store_volatile_store(
32+
; CHECK-SAME: <8 x i32> [[X:%.*]], ptr [[PTR:%.*]], <8 x i1> [[CMP:%.*]]) {
33+
; CHECK-NEXT: [[LOAD:%.*]] = load <8 x i32>, ptr [[PTR]], align 32
34+
; CHECK-NEXT: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[LOAD]]
35+
; CHECK-NEXT: store volatile <8 x i32> [[SEL]], ptr [[PTR]], align 32
36+
; CHECK-NEXT: ret void
37+
;
38+
%load = load <8 x i32>, ptr %ptr, align 32
39+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
40+
store volatile <8 x i32> %sel, ptr %ptr, align 32
41+
ret void
42+
}
43+
44+
declare void @use_vec(<8 x i32>)
45+
46+
define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
47+
; CHECK-LABEL: define void @test_masked_store_intervening(
48+
; CHECK-SAME: <8 x i32> [[X:%.*]], ptr [[PTR:%.*]], <8 x i1> [[CMP:%.*]]) {
49+
; CHECK-NEXT: [[LOAD:%.*]] = load <8 x i32>, ptr [[PTR]], align 32
50+
; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[PTR]], align 32
51+
; CHECK-NEXT: call void @use_vec(<8 x i32> zeroinitializer)
52+
; CHECK-NEXT: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[LOAD]]
53+
; CHECK-NEXT: store <8 x i32> [[SEL]], ptr [[PTR]], align 32
54+
; CHECK-NEXT: ret void
55+
;
56+
%load = load <8 x i32>, ptr %ptr, align 32
57+
store <8 x i32> zeroinitializer, ptr %ptr, align 32
58+
%tmp = load <8 x i32>, ptr %ptr
59+
call void @use_vec(<8 x i32> %tmp)
60+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
61+
store <8 x i32> %sel, ptr %ptr, align 32
62+
ret void
63+
}

llvm/test/Transforms/LoopVectorize/if-conversion.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end)
6161
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD4]]
6262
; CHECK-NEXT: [[TMP15:%.*]] = mul <4 x i32> [[WIDE_LOAD]], splat (i32 5)
6363
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], splat (i32 3)
64-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> [[TMP16]], <4 x i32> [[WIDE_LOAD]]
65-
; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP13]], align 4, !alias.scope [[META0]], !noalias [[META3]]
64+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP16]], ptr [[TMP13]], i32 4, <4 x i1> [[DOTNOT]])
6665
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6766
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6867
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]

0 commit comments

Comments
 (0)