Skip to content

Commit 1eea617

Browse files
phoebewangkrishna2803
authored andcommitted
[X86][APX] Do optimizeMemoryInst for v1X masked load/store (llvm#151331)
Fix redundant LEA: https://godbolt.org/z/34xEYE818
1 parent dc8d8fd commit 1eea617

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
27692769
return optimizeGatherScatterInst(II, II->getArgOperand(0));
27702770
case Intrinsic::masked_scatter:
27712771
return optimizeGatherScatterInst(II, II->getArgOperand(1));
2772+
case Intrinsic::masked_load:
2773+
// Treat v1X masked load as load X type.
2774+
if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2775+
if (VT->getNumElements() == 1) {
2776+
Value *PtrVal = II->getArgOperand(0);
2777+
unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2778+
if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2779+
return true;
2780+
}
2781+
}
2782+
return false;
2783+
case Intrinsic::masked_store:
2784+
// Treat v1X masked store as store X type.
2785+
if (auto *VT =
2786+
dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2787+
if (VT->getNumElements() == 1) {
2788+
Value *PtrVal = II->getArgOperand(1);
2789+
unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2790+
if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2791+
return true;
2792+
}
2793+
}
2794+
return false;
27722795
}
27732796

27742797
SmallVector<Value *, 2> PtrOps;

llvm/test/CodeGen/X86/apx/cf.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,24 @@ entry:
194194
call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
195195
ret void
196196
}
197+
198+
define void @sink_gep(ptr %p, i1 %cond) {
199+
; CHECK-LABEL: sink_gep:
200+
; CHECK: # %bb.0: # %entry
201+
; CHECK-NEXT: xorl %eax, %eax
202+
; CHECK-NEXT: testb $1, %sil
203+
; CHECK-NEXT: cfcmovnel %eax, 112(%rdi)
204+
; CHECK-NEXT: cfcmovnel 112(%rdi), %eax
205+
; CHECK-NEXT: movl %eax, (%rdi)
206+
; CHECK-NEXT: retq
207+
entry:
208+
%0 = getelementptr i8, ptr %p, i64 112
209+
br label %next
210+
211+
next:
212+
%1 = bitcast i1 %cond to <1 x i1>
213+
call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1)
214+
%2 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %0, i32 1, <1 x i1> %1, <1 x i32> zeroinitializer)
215+
store <1 x i32> %2, ptr %p, align 4
216+
ret void
217+
}

0 commit comments

Comments
 (0)