Skip to content

Commit 4341a43

Browse files
jcoplin-quicmemfrob
authored andcommitted
Attach metadata to simplified masked loads and stores
1 parent db56ba0 commit 4341a43

File tree

2 files changed

+61
-6
lines changed

2 files changed

+61
-6
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -288,16 +288,20 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
288288

289289
// If the mask is all ones or undefs, this is a plain vector load of the 1st
290290
// argument.
291-
if (maskIsAllOneOrUndef(II.getArgOperand(2)))
292-
return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
293-
"unmaskedload");
291+
if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
292+
LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
293+
"unmaskedload");
294+
L->copyMetadata(II);
295+
return L;
296+
}
294297

295298
// If we can unconditionally load from this address, replace with a
296299
// load/select idiom. TODO: use DT for context sensitive query
297300
if (isDereferenceablePointer(LoadPtr, II.getType(),
298301
II.getModule()->getDataLayout(), &II, nullptr)) {
299-
Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
300-
"unmaskedload");
302+
LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
303+
"unmaskedload");
304+
LI->copyMetadata(II);
301305
return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
302306
}
303307

@@ -320,7 +324,10 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
320324
if (ConstMask->isAllOnesValue()) {
321325
Value *StorePtr = II.getArgOperand(1);
322326
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
323-
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
327+
StoreInst *S =
328+
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
329+
S->copyMetadata(II);
330+
return S;
324331
}
325332

326333
if (isa<ScalableVectorType>(ConstMask->getType()))
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -instcombine < %s | FileCheck %s
3+
4+
@g0 = global <4 x i32> zeroinitializer, align 16
5+
6+
define inreg <4 x i32> @mload1(<4 x i32>* nocapture readonly %a0) #0 {
7+
; CHECK-LABEL: @mload1(
8+
; CHECK-NEXT: b0:
9+
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, <4 x i32>* [[A0:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
10+
; CHECK-NEXT: ret <4 x i32> [[UNMASKEDLOAD]]
11+
;
12+
b0:
13+
%v0 = call <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>* %a0, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !0
14+
ret <4 x i32> %v0
15+
}
16+
17+
define inreg <4 x i32> @mload2() #0 {
18+
; CHECK-LABEL: @mload2(
19+
; CHECK-NEXT: b0:
20+
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, <4 x i32>* @g0, align 16, !tbaa [[TBAA0]]
21+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> [[UNMASKEDLOAD]], i32 0, i32 0
22+
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
23+
;
24+
b0:
25+
%v0 = call <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>* @g0, i32 16, <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer), !tbaa !0
26+
ret <4 x i32> %v0
27+
}
28+
29+
define void @mstore(<4 x i32> %a0, <4 x i32>* nocapture readonly %a1) #0 {
30+
; CHECK-LABEL: @mstore(
31+
; CHECK-NEXT: b0:
32+
; CHECK-NEXT: store <4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], align 16, !tbaa [[TBAA0]]
33+
; CHECK-NEXT: ret void
34+
;
35+
b0:
36+
call void @llvm.masked.store.v4i1.p0v4i1(<4 x i32> %a0, <4 x i32>* %a1, i32 16, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !tbaa !0
37+
ret void
38+
}
39+
40+
attributes #0 = { norecurse nounwind }
41+
42+
declare <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
43+
declare void @llvm.masked.store.v4i1.p0v4i1(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
44+
45+
!0 = !{!1, !1, i64 0}
46+
!1 = !{!"omnipotent char", !2, i64 0}
47+
!2 = !{!"Simple C/C++ TBAA"}
48+

0 commit comments

Comments
 (0)