Skip to content

Commit b9775d1

Browse files
paulwalker-armgithub-actions[bot]
authored andcommitted
Automerge: [LLVM][CodeGen][SVE] Use DUPM for constantfp splats. (#168391)
This helps cases where the immediate range of FDUP is not sufficient.
2 parents 9d03135 + 59ed6df commit b9775d1

File tree

11 files changed

+626
-345
lines changed

11 files changed

+626
-345
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
44034403

44044404
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
44054405
bool Invert) {
4406-
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4407-
uint64_t ImmVal = CNode->getZExtValue();
4408-
SDLoc DL(N);
4409-
4410-
if (Invert)
4411-
ImmVal = ~ImmVal;
4406+
uint64_t ImmVal;
4407+
if (auto CI = dyn_cast<ConstantSDNode>(N))
4408+
ImmVal = CI->getZExtValue();
4409+
else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4410+
ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4411+
else
4412+
return false;
44124413

4413-
// Shift mask depending on type size.
4414-
switch (VT.SimpleTy) {
4415-
case MVT::i8:
4416-
ImmVal &= 0xFF;
4417-
ImmVal |= ImmVal << 8;
4418-
ImmVal |= ImmVal << 16;
4419-
ImmVal |= ImmVal << 32;
4420-
break;
4421-
case MVT::i16:
4422-
ImmVal &= 0xFFFF;
4423-
ImmVal |= ImmVal << 16;
4424-
ImmVal |= ImmVal << 32;
4425-
break;
4426-
case MVT::i32:
4427-
ImmVal &= 0xFFFFFFFF;
4428-
ImmVal |= ImmVal << 32;
4429-
break;
4430-
case MVT::i64:
4431-
break;
4432-
default:
4433-
llvm_unreachable("Unexpected type");
4434-
}
4414+
if (Invert)
4415+
ImmVal = ~ImmVal;
44354416

4436-
uint64_t encoding;
4437-
if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4438-
Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4439-
return true;
4440-
}
4417+
// Shift mask depending on type size.
4418+
switch (VT.SimpleTy) {
4419+
case MVT::i8:
4420+
ImmVal &= 0xFF;
4421+
ImmVal |= ImmVal << 8;
4422+
ImmVal |= ImmVal << 16;
4423+
ImmVal |= ImmVal << 32;
4424+
break;
4425+
case MVT::i16:
4426+
ImmVal &= 0xFFFF;
4427+
ImmVal |= ImmVal << 16;
4428+
ImmVal |= ImmVal << 32;
4429+
break;
4430+
case MVT::i32:
4431+
ImmVal &= 0xFFFFFFFF;
4432+
ImmVal |= ImmVal << 32;
4433+
break;
4434+
case MVT::i64:
4435+
break;
4436+
default:
4437+
llvm_unreachable("Unexpected type");
44414438
}
4442-
return false;
4439+
4440+
uint64_t encoding;
4441+
if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
4442+
return false;
4443+
4444+
Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4445+
return true;
44434446
}
44444447

44454448
// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
989989
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
990990

991991
// Duplicate FP immediate into all vector elements
992-
let AddedComplexity = 2 in {
992+
let AddedComplexity = 3 in {
993993
def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
994994
(FDUP_ZI_H fpimm16:$imm8)>;
995995
def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
347347
def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
348348
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
349349

350+
def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
351+
def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
352+
def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
353+
def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
354+
350355
def SVELogicalImm8NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
351356
def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
352357
def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
@@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
21602165
(!cast<Instruction>(NAME) i64:$imm)>;
21612166
def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
21622167
(!cast<Instruction>(NAME) i64:$imm)>;
2168+
2169+
def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
2170+
(!cast<Instruction>(NAME) i64:$imm)>;
2171+
def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
2172+
(!cast<Instruction>(NAME) i64:$imm)>;
2173+
def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
2174+
(!cast<Instruction>(NAME) i64:$imm)>;
2175+
def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
2176+
(!cast<Instruction>(NAME) i64:$imm)>;
2177+
def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
2178+
(!cast<Instruction>(NAME) i64:$imm)>;
2179+
def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
2180+
(!cast<Instruction>(NAME) i64:$imm)>;
2181+
2182+
def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
2183+
(!cast<Instruction>(NAME) i64:$imm)>;
2184+
def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
2185+
(!cast<Instruction>(NAME) i64:$imm)>;
2186+
def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
2187+
(!cast<Instruction>(NAME) i64:$imm)>;
21632188
}
21642189

21652190
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sve-bf16-combines.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
632632
; SVE: // %bb.0:
633633
; SVE-NEXT: uunpkhi z3.s, z2.h
634634
; SVE-NEXT: uunpkhi z4.s, z1.h
635-
; SVE-NEXT: mov w8, #32768 // =0x8000
636635
; SVE-NEXT: uunpklo z2.s, z2.h
637636
; SVE-NEXT: uunpklo z1.s, z1.h
638637
; SVE-NEXT: ptrue p1.s
@@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
643642
; SVE-NEXT: fmul z3.s, z4.s, z3.s
644643
; SVE-NEXT: fmul z1.s, z1.s, z2.s
645644
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
646-
; SVE-NEXT: fmov h3, w8
645+
; SVE-NEXT: dupm z3.h, #0x8000
647646
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
648-
; SVE-NEXT: mov z3.h, h3
649647
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
650648
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
651649
; SVE-NEXT: uunpkhi z3.s, z0.h
@@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
665663
;
666664
; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
667665
; SVE-B16B16: // %bb.0:
668-
; SVE-B16B16-NEXT: mov w8, #32768 // =0x8000
666+
; SVE-B16B16-NEXT: dupm z3.h, #0x8000
669667
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
670-
; SVE-B16B16-NEXT: fmov h3, w8
671-
; SVE-B16B16-NEXT: mov z3.h, h3
672668
; SVE-B16B16-NEXT: sel z1.h, p0, z1.h, z3.h
673669
; SVE-B16B16-NEXT: bfsub z0.h, z0.h, z1.h
674670
; SVE-B16B16-NEXT: ret

llvm/test/CodeGen/AArch64/sve-fp-combine.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
11341134
define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
11351135
; CHECK-LABEL: fsub_sel_fmul_h_negzero:
11361136
; CHECK: // %bb.0:
1137-
; CHECK-NEXT: mov w8, #32768 // =0x8000
1137+
; CHECK-NEXT: dupm z3.h, #0x8000
11381138
; CHECK-NEXT: fmul z1.h, z1.h, z2.h
1139-
; CHECK-NEXT: mov z2.h, w8
1140-
; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h
1139+
; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
11411140
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
11421141
; CHECK-NEXT: ret
11431142
%fmul = fmul <vscale x 8 x half> %b, %c
@@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
11501149
define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
11511150
; CHECK-LABEL: fsub_sel_fmul_s_negzero:
11521151
; CHECK: // %bb.0:
1153-
; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
1152+
; CHECK-NEXT: mov z3.s, #0x80000000
11541153
; CHECK-NEXT: fmul z1.s, z1.s, z2.s
1155-
; CHECK-NEXT: mov z2.s, w8
1156-
; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
1154+
; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
11571155
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
11581156
; CHECK-NEXT: ret
11591157
%fmul = fmul <vscale x 4 x float> %b, %c
@@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
11661164
define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
11671165
; CHECK-LABEL: fsub_sel_fmul_d_negzero:
11681166
; CHECK: // %bb.0:
1169-
; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
1167+
; CHECK-NEXT: mov z3.d, #0x8000000000000000
11701168
; CHECK-NEXT: fmul z1.d, z1.d, z2.d
1171-
; CHECK-NEXT: mov z2.d, x8
1172-
; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
1169+
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
11731170
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
11741171
; CHECK-NEXT: ret
11751172
%fmul = fmul <vscale x 2 x double> %b, %c

llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
5151
; CHECK-NEXT: addvl sp, sp, #-1
5252
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
5353
; CHECK-NEXT: .cfi_offset w29, -16
54-
; CHECK-NEXT: mov w8, #32768 // =0x8000
54+
; CHECK-NEXT: dupm z2.h, #0x8000
5555
; CHECK-NEXT: ptrue p0.d
5656
; CHECK-NEXT: str z0, [sp]
57-
; CHECK-NEXT: mov z2.h, w8
5857
; CHECK-NEXT: fmov s0, s1
5958
; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl]
6059
; CHECK-NEXT: ptrue p0.h
@@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
7776
; CHECK-NEXT: .cfi_offset w29, -16
7877
; CHECK-NEXT: ptrue p0.h
7978
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
80-
; CHECK-NEXT: mov w8, #32768 // =0x8000
8179
; CHECK-NEXT: str z1, [sp]
80+
; CHECK-NEXT: addvl x8, sp, #1
8281
; CHECK-NEXT: ptrue p1.d
8382
; CHECK-NEXT: fadda h2, p0, h2, z0.h
84-
; CHECK-NEXT: mov z0.h, w8
85-
; CHECK-NEXT: addvl x8, sp, #1
83+
; CHECK-NEXT: dupm z0.h, #0x8000
8684
; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl]
8785
; CHECK-NEXT: ldr z1, [sp]
8886
; CHECK-NEXT: str z1, [sp, #1, mul vl]
@@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
105103
; CHECK: // %bb.0:
106104
; CHECK-NEXT: ptrue p0.h
107105
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
108-
; CHECK-NEXT: mov w8, #32768 // =0x8000
106+
; CHECK-NEXT: uunpklo z1.s, z1.h
109107
; CHECK-NEXT: fadda h2, p0, h2, z0.h
110-
; CHECK-NEXT: uunpklo z0.s, z1.h
111-
; CHECK-NEXT: mov z1.h, w8
112-
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
108+
; CHECK-NEXT: dupm z0.h, #0x8000
109+
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
113110
; CHECK-NEXT: fadda h2, p0, h2, z0.h
114111
; CHECK-NEXT: fmov s0, s2
115112
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)