Skip to content

Commit 7f8e9c8

Browse files
committed
Add fixed-width tests
Use APInt maths Refine tests
1 parent fc9274f commit 7f8e9c8

File tree

2 files changed

+103
-44
lines changed

2 files changed

+103
-44
lines changed

llvm/lib/Analysis/MemoryLocation.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,16 +161,21 @@ getKnownTypeFromMaskedOp(Value *Mask, VectorType *Ty) {
161161
m_ConstantInt(Op0), m_ConstantInt(Op1))))
162162
return std::nullopt;
163163

164-
uint64_t LaneMaskLo = Op0->getZExtValue();
165-
uint64_t LaneMaskHi = Op1->getZExtValue();
166-
if ((LaneMaskHi == 0) || (LaneMaskHi <= LaneMaskLo))
164+
APInt LaneMaskLo = Op0->getValue();
165+
APInt LaneMaskHi = Op1->getValue();
166+
if (LaneMaskHi.ule(LaneMaskLo))
167167
return std::nullopt;
168168

169-
uint64_t NumElts = LaneMaskHi - LaneMaskLo;
170-
if (NumElts > Ty->getElementCount().getKnownMinValue())
171-
return std::nullopt;
169+
APInt NumElts = LaneMaskHi - LaneMaskLo;
170+
if (NumElts.ugt(Ty->getElementCount().getKnownMinValue())) {
171+
if (isa<ScalableVectorType>(Ty))
172+
return std::nullopt;
173+
// Unlike scalable vectors, fixed vector types are guaranteed to handle the
174+
// KnownMinValue and can be clamped
175+
NumElts = Ty->getElementCount().getKnownMinValue();
176+
}
172177

173-
return FixedVectorType::get(Ty->getElementType(), NumElts);
178+
return FixedVectorType::get(Ty->getElementType(), NumElts.getZExtValue());
174179
}
175180

176181
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
@@ -243,7 +248,8 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
243248
if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(2), Ty))
244249
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
245250

246-
return MemoryLocation(Arg, DL.getTypeStoreSize(Ty), AATags);
251+
return MemoryLocation(
252+
Arg, LocationSize::upperBound(DL.getTypeStoreSize(Ty)), AATags);
247253
}
248254
case Intrinsic::masked_store: {
249255
assert(ArgIdx == 1 && "Invalid argument index");
@@ -252,7 +258,8 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
252258
if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(3), Ty))
253259
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
254260

255-
return MemoryLocation(Arg, DL.getTypeStoreSize(Ty), AATags);
261+
return MemoryLocation(
262+
Arg, LocationSize::upperBound(DL.getTypeStoreSize(Ty)), AATags);
256263
}
257264

258265
case Intrinsic::invariant_end:

llvm/test/Analysis/BasicAA/scalable-dse-aa.ll

Lines changed: 87 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,29 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
21
; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
32

4-
define <vscale x 4 x float> @dead_scalable_store(i32 %0, ptr %1) {
3+
define <vscale x 4 x float> @dead_scalable_store(ptr %0) {
54
; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store(
6-
; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
7-
; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask)
8-
; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
5+
; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
6+
; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask)
7+
; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
98
;
109
%arr = alloca [64 x i32], align 4
1110
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
1211

13-
%gep.1.16 = getelementptr inbounds nuw i8, ptr %1, i64 16
14-
%gep.1.32 = getelementptr inbounds nuw i8, ptr %1, i64 32
15-
%gep.1.48 = getelementptr inbounds nuw i8, ptr %1, i64 48
12+
%gep.0.16 = getelementptr inbounds nuw i8, ptr %0, i64 16
13+
%gep.0.32 = getelementptr inbounds nuw i8, ptr %0, i64 32
14+
%gep.0.48 = getelementptr inbounds nuw i8, ptr %0, i64 48
1615
%gep.arr.16 = getelementptr inbounds nuw i8, ptr %arr, i64 16
1716
%gep.arr.32 = getelementptr inbounds nuw i8, ptr %arr, i64 32
1817
%gep.arr.48 = getelementptr inbounds nuw i8, ptr %arr, i64 48
1918

20-
%load.1.16 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.1.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
21-
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
19+
%load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
20+
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
2221

23-
%load.1.32 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.1.32, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
24-
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask)
22+
%load.0.32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.32, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
23+
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask)
2524

26-
%load.1.48 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.1.48, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
27-
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.1.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
25+
%load.0.48 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.48, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
26+
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
2827

2928
%faddop0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
3029
%faddop1 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
@@ -33,6 +32,39 @@ define <vscale x 4 x float> @dead_scalable_store(i32 %0, ptr %1) {
3332
ret <vscale x 4 x float> %fadd
3433
}
3534

35+
define <4 x float> @dead_scalable_store_fixed(ptr %0) {
36+
; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed(
37+
; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask)
38+
; CHECK-NOT: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.32, ptr nonnull %gep.arr.36, i32 1, <4 x i1> %mask2)
39+
; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask)
40+
;
41+
%arr = alloca [64 x i32], align 4
42+
%mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
43+
%mask2 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3)
44+
45+
%gep.0.16 = getelementptr inbounds nuw i8, ptr %0, i64 16
46+
%gep.0.36 = getelementptr inbounds nuw i8, ptr %0, i64 36
47+
%gep.0.48 = getelementptr inbounds nuw i8, ptr %0, i64 48
48+
%gep.arr.16 = getelementptr inbounds nuw i8, ptr %arr, i64 16
49+
%gep.arr.36 = getelementptr inbounds nuw i8, ptr %arr, i64 36
50+
%gep.arr.48 = getelementptr inbounds nuw i8, ptr %arr, i64 48
51+
52+
%load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
53+
call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask)
54+
55+
%load.0.36 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %gep.0.36, i32 1, <4 x i1> %mask2, <4 x float> zeroinitializer)
56+
call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.36, ptr nonnull %gep.arr.36, i32 1, <4 x i1> %mask2)
57+
58+
%load.0.48 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %gep.0.48, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
59+
call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask)
60+
61+
%faddop0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
62+
%faddop1 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
63+
%fadd = fadd <4 x float> %faddop0, %faddop1
64+
65+
ret <4 x float> %fadd
66+
}
67+
3668
define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) {
3769
; CHECK-LABEL: define <vscale x 4 x float> @scalable_store_partial_overwrite(
3870
; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
@@ -49,13 +81,13 @@ define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) {
4981
%gep.arr.30 = getelementptr inbounds nuw i8, ptr %arr, i64 30
5082
%gep.arr.48 = getelementptr inbounds nuw i8, ptr %arr, i64 48
5183

52-
%load.0.16 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
84+
%load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
5385
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
5486

55-
%load.0.30 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.30, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
87+
%load.0.30 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.30, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
5688
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask)
5789

58-
%load.0.48 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.48, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
90+
%load.0.48 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.48, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
5991
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
6092

6193
%faddop0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
@@ -80,13 +112,13 @@ define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) {
80112
%gep.arr.30 = getelementptr inbounds nuw i8, ptr %arr, i64 30
81113
%gep.arr.46 = getelementptr inbounds nuw i8, ptr %arr, i64 46
82114

83-
%load.0.16 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
115+
%load.0.16 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.16, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
84116
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
85117

86-
%load.0.30 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.30, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
118+
%load.0.30 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.30, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
87119
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask)
88120

89-
%load.0.46 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.46, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
121+
%load.0.46 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %gep.0.46, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
90122
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.46, ptr nonnull %gep.arr.46, i32 1, <vscale x 4 x i1> %mask)
91123

92124
%smallmask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.32(i32 0, i32 2)
@@ -101,7 +133,7 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) {
101133
; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store(
102134
; CHECK-NOT: store i32 20, ptr %gep.1.12
103135
;
104-
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
136+
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4)
105137
%gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12
106138
store i32 20, ptr %gep.1.12
107139

@@ -111,45 +143,65 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) {
111143
ret <vscale x 4 x float> %retval
112144
}
113145

146+
147+
; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask(
148+
; CHECK-NOT: store i32 20, ptr %1
149+
; CHECK: store i32 50, ptr %gep.5
150+
define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) {
151+
%mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7)
152+
store i32 20, ptr %1
153+
154+
%gep.5 = getelementptr inbounds nuw i32, ptr %1, i64 5
155+
store i32 50, ptr %gep.5
156+
157+
%load.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
158+
call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0, ptr nonnull %1, i32 1, <4 x i1> %mask)
159+
%retval = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %1, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer)
160+
ret <4 x float> %retval
161+
}
162+
114163
; We don't know if the scalar store is dead as we can't determine vscale.
115164
; This get active lane mask may cover 4 or 8 integers
116165
define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) {
117166
; CHECK-LABEL: define <vscale x 4 x float> @mask_gt_minimum_num_elts(
167+
; CHECK: store i32 10, ptr %gep.1.12
118168
; CHECK: store i32 20, ptr %gep.1.28
119169
;
120170
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8)
171+
%gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12
172+
store i32 10, ptr %gep.1.12
121173
%gep.1.28 = getelementptr inbounds nuw i8, ptr %1, i64 28
122174
store i32 20, ptr %gep.1.28
123175

124-
%load.0 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
176+
%load.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
125177
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0, ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask)
126178
%retval = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
127179
ret <vscale x 4 x float> %retval
128180
}
129181

130-
; Don't do anything if the 2nd Op of get active lane mask is 0. This currently generates poison
131-
define <vscale x 4 x float> @mask_hi_0(ptr noalias %0, ptr %1) {
132-
; CHECK-LABEL: define <vscale x 4 x float> @mask_hi_0(
182+
; Don't do anything if the mask's Op1 < Op0
183+
define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) {
184+
; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_lt(
133185
; CHECK: store i32 20, ptr %1
134186
;
135-
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 0)
187+
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2)
136188
store i32 20, ptr %1
137189

138-
%load.0 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
190+
%load.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
139191
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0, ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask)
140192
%retval = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
141193
ret <vscale x 4 x float> %retval
142194
}
143195

144-
; Don't do anything if the 2nd Op is gt/eq the 1st
145-
define <vscale x 4 x float> @active_lane_mask_gt_eq(ptr noalias %0, ptr %1) {
146-
; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_gt_eq(
196+
; Don't do anything if the mask's Op1 == Op0
197+
define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) {
198+
; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_eq(
147199
; CHECK: store i32 20, ptr %1
148200
;
149-
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2)
201+
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2)
150202
store i32 20, ptr %1
151203

152-
%load.0 = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
204+
%load.0 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
153205
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0, ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask)
154206
%retval = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
155207
ret <vscale x 4 x float> %retval
@@ -164,9 +216,9 @@ define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) {
164216
%gep.1.6 = getelementptr inbounds nuw i8, ptr %1, i64 6
165217
store i8 60, ptr %gep.1.6
166218
%gep.1.8 = getelementptr inbounds nuw i8, ptr %1, i64 8
167-
store i8 120, ptr %gep.1.8
219+
store i8 120, ptr %gep.1.8
168220

169-
%load.0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull %0, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> zeroinitializer)
221+
%load.0 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull %0, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> zeroinitializer)
170222
call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> %load.0, ptr %1, i32 1, <vscale x 16 x i1> %mask)
171223
%retval = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %1, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> zeroinitializer)
172224
ret <vscale x 16 x i8> %retval
@@ -193,4 +245,4 @@ define <vscale x 4 x float> @dead_scalar_store_offset(ptr noalias %0, ptr %1) {
193245
call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0, ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask)
194246
%retval = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %1, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
195247
ret <vscale x 4 x float> %retval
196-
}
248+
}

0 commit comments

Comments
 (0)