Skip to content

Commit c4a7e8a

Browse files
- Expand fixed-length get.active.lane.masks if the result or operands
would not be legal when lowering with SVE.
1 parent c670009 commit c4a7e8a

File tree

3 files changed

+39
-26
lines changed

3 files changed

+39
-26
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,6 +2105,11 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
21052105
if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
21062106
return true;
21072107

2108+
// Only support illegal types if the result is scalable.
2109+
if (ResVT.isFixedLengthVector() && (ResVT.getVectorNumElements() > 16 ||
2110+
(OpVT != MVT::i32 && OpVT != MVT::i64)))
2111+
return true;
2112+
21082113
// 32 & 64 bit operands are supported. We can promote anything < 64 bits,
21092114
// but anything larger should be expanded.
21102115
if (OpVT.getFixedSizeInBits() > 64)

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -930,8 +930,8 @@ define void @get_lane_mask() #0 {
930930
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
931931
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
932932
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
933-
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 64 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
934-
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
933+
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
934+
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
935935
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
936936
;
937937
; CHECK-VSCALE-2-LABEL: 'get_lane_mask'
@@ -953,8 +953,8 @@ define void @get_lane_mask() #0 {
953953
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
954954
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
955955
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
956-
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 64 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
957-
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
956+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
957+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
958958
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
959959
;
960960
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
@@ -976,8 +976,8 @@ define void @get_lane_mask() #0 {
976976
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
977977
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
978978
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
979-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 64 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
979+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
981981
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
982982
;
983983
%mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,11 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
278278
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
279279
; CHECK-LABEL: lane_mask_v16i1_i8:
280280
; CHECK: // %bb.0:
281-
; CHECK-NEXT: and w8, w1, #0xff
282-
; CHECK-NEXT: and w9, w0, #0xff
283-
; CHECK-NEXT: whilelo p0.b, w9, w8
284-
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
285-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
281+
; CHECK-NEXT: index z0.b, #0, #1
282+
; CHECK-NEXT: dup v1.16b, w0
283+
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
284+
; CHECK-NEXT: dup v1.16b, w1
285+
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
286286
; CHECK-NEXT: ret
287287
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
288288
ret <16 x i1> %active.lane.mask
@@ -291,11 +291,11 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
291291
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
292292
; CHECK-LABEL: lane_mask_v8i1_i8:
293293
; CHECK: // %bb.0:
294-
; CHECK-NEXT: and w8, w1, #0xff
295-
; CHECK-NEXT: and w9, w0, #0xff
296-
; CHECK-NEXT: whilelo p0.b, w9, w8
297-
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
298-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
294+
; CHECK-NEXT: index z0.b, #0, #1
295+
; CHECK-NEXT: dup v1.8b, w0
296+
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
297+
; CHECK-NEXT: dup v1.8b, w1
298+
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
299299
; CHECK-NEXT: ret
300300
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
301301
ret <8 x i1> %active.lane.mask
@@ -304,11 +304,15 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
304304
define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
305305
; CHECK-LABEL: lane_mask_v4i1_i8:
306306
; CHECK: // %bb.0:
307-
; CHECK-NEXT: and w8, w1, #0xff
308-
; CHECK-NEXT: and w9, w0, #0xff
309-
; CHECK-NEXT: whilelo p0.h, w9, w8
310-
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
311-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
307+
; CHECK-NEXT: dup v0.4h, w0
308+
; CHECK-NEXT: index z1.h, #0, #1
309+
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
310+
; CHECK-NEXT: dup v3.4h, w1
311+
; CHECK-NEXT: bic v0.4h, #255, lsl #8
312+
; CHECK-NEXT: bic v3.4h, #255, lsl #8
313+
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
314+
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
315+
; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h
312316
; CHECK-NEXT: ret
313317
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
314318
ret <4 x i1> %active.lane.mask
@@ -317,11 +321,15 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
317321
define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
318322
; CHECK-LABEL: lane_mask_v2i1_i8:
319323
; CHECK: // %bb.0:
320-
; CHECK-NEXT: and w8, w1, #0xff
321-
; CHECK-NEXT: and w9, w0, #0xff
322-
; CHECK-NEXT: whilelo p0.s, w9, w8
323-
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
324-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
324+
; CHECK-NEXT: movi d0, #0x0000ff000000ff
325+
; CHECK-NEXT: dup v1.2s, w0
326+
; CHECK-NEXT: index z2.s, #0, #1
327+
; CHECK-NEXT: dup v3.2s, w1
328+
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
329+
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
330+
; CHECK-NEXT: and v2.8b, v3.8b, v0.8b
331+
; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s
332+
; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s
325333
; CHECK-NEXT: ret
326334
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
327335
ret <2 x i1> %active.lane.mask

0 commit comments

Comments
 (0)