Skip to content

Commit 49833a2

Browse files
committed
[ConstantFolding] Fold scalable get_active_lane_masks
Scalable get_active_lane_mask intrinsics with a range of 0 can be lowered to zeroinitializer. This helps remove no-op scalable masked stores and loads. When the second operand is 0, this cannot be done (see llvm#152140)
1 parent 6d902b6 commit 49833a2

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4238,6 +4238,13 @@ static Constant *ConstantFoldScalableVectorCall(
42384238

42394239
return ConstantInt::getFalse(SVTy);
42404240
}
4241+
case Intrinsic::get_active_lane_mask: {
4242+
auto Op0 = cast<ConstantInt>(Operands[0])->getValue();
4243+
auto Op1 = cast<ConstantInt>(Operands[1])->getValue();
4244+
if ((Op0.uge(Op1) && (!Op1.isZero())))
4245+
return ConstantVector::getNullValue(SVTy);
4246+
break;
4247+
}
42414248
default:
42424249
break;
42434250
}

llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,40 @@ entry:
307307
ret <4 x float> %var33
308308
}
309309

310+
define <vscale x 4 x i1> @nxv4i1_12_12() {
311+
; CHECK-LABEL: @nxv4i1_12_12(
312+
; CHECK-NEXT: entry:
313+
; CHECK-NEXT: ret <vscale x 4 x i1> zeroinitializer
314+
;
315+
entry:
316+
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 12, i32 12)
317+
ret <vscale x 4 x i1> %mask
318+
}
319+
320+
define <vscale x 4 x i1> @nxv4i1_8_4() {
321+
; CHECK-LABEL: @nxv4i1_8_4(
322+
; CHECK-NEXT: entry:
323+
; CHECK-NEXT: ret <vscale x 4 x i1> zeroinitializer
324+
;
325+
entry:
326+
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 8, i32 4)
327+
ret <vscale x 4 x i1> %mask
328+
}
329+
330+
define <vscale x 16 x i1> @nxv16i1_0_0() {
331+
; CHECK-LABEL: @nxv16i1_0_0(
332+
; CHECK-NEXT: entry:
333+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 0)
334+
; CHECK-NEXT: ret <vscale x 16 x i1> [[MASK]]
335+
;
336+
entry:
337+
%mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 0)
338+
ret <vscale x 16 x i1> %mask
339+
}
340+
310341
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
311342
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
312343
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
344+
345+
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32)
346+
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)

0 commit comments

Comments
 (0)