diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0126b97c9fb9a..af4780e11e890 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -766,13 +766,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(Op, MVT::v8bf16, Expand); } - // For bf16, fpextend is custom lowered to be optionally expanded into shifts. - setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); + // fpextend from f16 or bf16 to f32 is legal + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f32, Legal); + // fpextend from bf16 to f64 needs to be split into two fpextends setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f32, Custom); auto LegalizeNarrowFP = [this](MVT ScalarVT) { for (auto Op : { @@ -4559,33 +4560,6 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, return SDValue(); } - if (VT.getScalarType() == MVT::f32) { - // FP16->FP32 extends are legal for v32 and v4f32. - if (Op0VT.getScalarType() == MVT::f16) - return Op; - if (Op0VT.getScalarType() == MVT::bf16) { - SDLoc DL(Op); - EVT IVT = VT.changeTypeToInteger(); - if (!Op0VT.isVector()) { - Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4bf16, Op0); - IVT = MVT::v4i32; - } - - EVT Op0IVT = Op0.getValueType().changeTypeToInteger(); - SDValue Ext = - DAG.getNode(ISD::ANY_EXTEND, DL, IVT, DAG.getBitcast(Op0IVT, Op0)); - SDValue Shift = - DAG.getNode(ISD::SHL, DL, IVT, Ext, DAG.getConstant(16, DL, IVT)); - if (!Op0VT.isVector()) - Shift = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Shift, - DAG.getConstant(0, DL, MVT::i64)); - Shift = DAG.getBitcast(VT, Shift); - return IsStrict ? DAG.getMergeValues({Shift, Op.getOperand(0)}, DL) - : Shift; - } - return SDValue(); - } - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index bee86aa86ec37..a75091b853d21 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8513,6 +8513,26 @@ def : InstAlias<"uxtl2 $dst.2d, $src1.4s", (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; } +// fpextend from bf16 to f32 is just a shift left by 16 +let Predicates = [HasNEON] in { +def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))), + (f32 (EXTRACT_SUBREG + (v4i32 (SHLLv4i16 (v4i16 (SUBREG_TO_REG (i64 0), (bf16 FPR16:$Rn), hsub)))), + ssub))>; +def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))), + (SHLLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (any_fpextend (extract_high_v8bf16 (v8bf16 V128:$Rn)))), + (SHLLv8i16 V128:$Rn)>; +} +// Fallback pattern for when we don't have NEON +def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))), + (f32 (COPY_TO_REGCLASS + (i32 (UBFMWri (COPY_TO_REGCLASS + (f32 (SUBREG_TO_REG (i32 0), (bf16 FPR16:$Rn), hsub)), + GPR32), + (i64 16), (i64 15))), + FPR32))>; + def abs_f16 : OutPatFrag<(ops node:$Rn), (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 9a1203f18243d..1d33545cb171a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -155,9 +155,7 @@ entry: define i32 @fptosi_bf(bfloat %a) nounwind ssp { ; CHECK-LABEL: fptosi_bf: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, s0 -; CHECK-NEXT: // implicit-def: $d0 -; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: // kill: def $d0 killed $h0 ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcvtzs w0, s0 @@ -171,9 +169,7 @@ entry: define i32 @fptoui_sbf(bfloat %a) nounwind ssp { ; CHECK-LABEL: fptoui_sbf: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, s0 -; CHECK-NEXT: // implicit-def: $d0 -; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: // kill: def $d0 killed $h0 ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcvtzu w0, s0 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll index 9b5e48d2b4217..e3e18a1f91c6d 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll @@ -641,7 +641,7 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4: ; NOLSE: // %bb.0: ; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0 -; NOLSE-NEXT: dup v1.4h, v0.h[1] +; NOLSE-NEXT: mov h1, v0.h[1] ; NOLSE-NEXT: mov w8, #32767 // =0x7fff ; NOLSE-NEXT: shll v0.4s, v0.4h, #16 ; NOLSE-NEXT: shll v1.4s, v1.4h, #16 @@ -649,7 +649,7 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 ; NOLSE-NEXT: ldaxr w9, [x0] ; NOLSE-NEXT: fmov s2, w9 -; NOLSE-NEXT: dup v3.4h, v2.h[1] +; NOLSE-NEXT: mov h3, v2.h[1] ; NOLSE-NEXT: shll v2.4s, v2.4h, #16 ; NOLSE-NEXT: fmaxnm s2, s2, s0 ; NOLSE-NEXT: shll v3.4s, v3.4h, #16 @@ -677,14 +677,14 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4: ; LSE: // %bb.0: ; LSE-NEXT: // kill: def $d0 killed $d0 def $q0 -; LSE-NEXT: dup v1.4h, v0.h[1] +; LSE-NEXT: mov h1, v0.h[1] ; LSE-NEXT: shll v2.4s, v0.4h, #16 ; LSE-NEXT: mov w8, #32767 // =0x7fff ; LSE-NEXT: ldr s0, [x0] ; LSE-NEXT: shll v1.4s, v1.4h, #16 ; LSE-NEXT: .LBB7_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: dup v3.4h, v0.h[1] +; LSE-NEXT: mov h3, v0.h[1] ; LSE-NEXT: shll v4.4s, v0.4h, #16 ; LSE-NEXT: fmaxnm s4, s4, s2 ; LSE-NEXT: shll v3.4s, v3.4h, #16 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll index f6c542fe7d407..10de6777bd285 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll @@ -641,7 +641,7 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4: ; NOLSE: // %bb.0: ; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0 -; NOLSE-NEXT: dup v1.4h, v0.h[1] +; NOLSE-NEXT: mov h1, v0.h[1] ; NOLSE-NEXT: mov w8, #32767 // =0x7fff ; NOLSE-NEXT: shll v0.4s, v0.4h, #16 ; NOLSE-NEXT: shll v1.4s, v1.4h, #16 @@ -649,7 +649,7 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 ; NOLSE-NEXT: ldaxr w9, [x0] ; NOLSE-NEXT: fmov s2, w9 -; NOLSE-NEXT: dup v3.4h, v2.h[1] +; NOLSE-NEXT: mov h3, v2.h[1] ; NOLSE-NEXT: shll v2.4s, v2.4h, #16 ; NOLSE-NEXT: fminnm s2, s2, s0 ; NOLSE-NEXT: shll v3.4s, v3.4h, #16 @@ -677,14 +677,14 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf ; LSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4: ; LSE: // %bb.0: ; LSE-NEXT: // kill: def $d0 killed $d0 def $q0 -; LSE-NEXT: dup v1.4h, v0.h[1] +; LSE-NEXT: mov h1, v0.h[1] ; LSE-NEXT: shll v2.4s, v0.4h, #16 ; LSE-NEXT: mov w8, #32767 // =0x7fff ; LSE-NEXT: ldr s0, [x0] ; LSE-NEXT: shll v1.4s, v1.4h, #16 ; LSE-NEXT: .LBB7_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: dup v3.4h, v0.h[1] +; LSE-NEXT: mov h3, v0.h[1] ; LSE-NEXT: shll v4.4s, v0.4h, #16 ; LSE-NEXT: fminnm s4, s4, s2 ; LSE-NEXT: shll v3.4s, v3.4h, #16 diff --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll index 2fc9c53112ab6..9f002b1e0da55 100644 --- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll @@ -202,16 +202,13 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 { ; ; CHECK-BF16-LABEL: test_fmadd: ; CHECK-BF16: // %bb.0: +; CHECK-BF16-NEXT: // kill: def $h2 killed $h2 def $d2 ; CHECK-BF16-NEXT: // kill: def $h1 killed $h1 def $d1 ; CHECK-BF16-NEXT: // kill: def $h0 killed $h0 def $d0 -; CHECK-BF16-NEXT: // kill: def $h2 killed $h2 def $d2 ; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16 ; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 -; CHECK-BF16-NEXT: fmul s0, s0, s1 -; CHECK-BF16-NEXT: shll v1.4s, v2.4h, #16 -; CHECK-BF16-NEXT: bfcvt h0, s0 -; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 -; CHECK-BF16-NEXT: fadd s0, s0, s1 +; CHECK-BF16-NEXT: shll v2.4s, v2.4h, #16 +; CHECK-BF16-NEXT: fmadd s0, s0, s1, s2 ; CHECK-BF16-NEXT: bfcvt h0, s0 ; CHECK-BF16-NEXT: ret %mul = fmul fast bfloat %a, %b @@ -1996,13 +1993,11 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 { define float @test_copysign_extended(bfloat %a, bfloat %b) #0 { ; CHECK-CVT-LABEL: test_copysign_extended: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $d0 -; CHECK-CVT-NEXT: movi v2.4s, #16 ; CHECK-CVT-NEXT: // kill: def $h1 killed $h1 def $d1 -; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 -; CHECK-CVT-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-CVT-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 +; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-CVT-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-CVT-NEXT: fmov w8, s0 ; CHECK-CVT-NEXT: lsr w8, w8, #16 @@ -2013,16 +2008,12 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 { ; ; CHECK-SD-LABEL: test_copysign_extended: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $d0 -; CHECK-SD-NEXT: movi v2.4s, #16 ; CHECK-SD-NEXT: // kill: def $h1 killed $h1 def $d1 -; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-SD-NEXT: shll v1.4s, v1.4h, #16 -; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $d0 ; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24 -; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: bfcvt h0, s0 +; CHECK-SD-NEXT: shll v1.4s, v1.4h, #16 ; CHECK-SD-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/bf16_fast_math.ll b/llvm/test/CodeGen/AArch64/bf16_fast_math.ll index 7d7fb67ca2f77..871ca12c9de77 100644 --- a/llvm/test/CodeGen/AArch64/bf16_fast_math.ll +++ b/llvm/test/CodeGen/AArch64/bf16_fast_math.ll @@ -4,8 +4,6 @@ ; Check that the output instructions have the same fast math flags as the input ; fadd, even when bf16 is legalized to f32. -; FIXME: Conversion from float to bf16 is done via a vector type for some -; reason, when we should just be using scalar instructions. define bfloat @normal_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-LABEL: name: normal_fadd @@ -14,13 +12,11 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: {{ $}} ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -40,13 +36,11 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) { ; CHECK-BF16-NEXT: {{ $}} ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr @@ -64,13 +58,11 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: {{ $}} ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -90,13 +82,11 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) { ; CHECK-BF16-NEXT: {{ $}} ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr @@ -114,13 +104,11 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) { ; CHECK-NOBF16-NEXT: {{ $}} ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -140,13 +128,11 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) { ; CHECK-BF16-NEXT: {{ $}} ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr @@ -159,8 +145,6 @@ entry: ; Check that when we have the right fast math flags the converts in between the ; two fadds are removed. -; FIXME: The convert from float to bf16 being done by a shift prevents this from -; happening. define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-LABEL: name: normal_fadd_sequence @@ -170,13 +154,11 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -187,13 +169,11 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31 ; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]] ; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub - ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]] ; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]] @@ -213,23 +193,19 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr - ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[BFCVT]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]] ; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr @@ -249,13 +225,11 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -266,13 +240,11 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) ; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31 ; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]] ; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub - ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]] ; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]] @@ -292,27 +264,19 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr - ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr - ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] - ; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub - ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr - ; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr - ; CHECK-BF16-NEXT: $h0 = COPY [[BFCVT1]] + ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[FADDSrr]], killed [[COPY5]], implicit $fpcr + ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr + ; CHECK-BF16-NEXT: $h0 = COPY [[BFCVT]] ; CHECK-BF16-NEXT: RET_ReallyLR implicit $h0 entry: %add1 = fadd nnan ninf contract bfloat %x, %y @@ -328,13 +292,11 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]] @@ -345,13 +307,11 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31 ; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]] ; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub - ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] + ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]] ; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub ; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr ; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]] @@ -371,23 +331,19 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) { ; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2 ; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 ; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]] ; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub - ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]] ; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub ; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr - ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[BFCVT]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]] ; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub - ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF - ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub - ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]] + ; CHECK-BF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]] ; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr ; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 381c67c6d749e..da6b3bb99dbda 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -74,30 +74,16 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) { ; ; NONEON-NOSVE-LABEL: test_copysign_bf16: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 ; NONEON-NOSVE-NEXT: ldr h0, [x0] ; NONEON-NOSVE-NEXT: ldr h1, [x1] -; NONEON-NOSVE-NEXT: str h0, [sp, #40] -; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] -; NONEON-NOSVE-NEXT: str h1, [sp, #76] -; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NONEON-NOSVE-NEXT: str q0, [sp] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] -; NONEON-NOSVE-NEXT: lsl w9, w8, #16 -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] -; NONEON-NOSVE-NEXT: lsl w8, w8, #16 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] -; NONEON-NOSVE-NEXT: lsl w9, w8, #16 -; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: str h1, [sp, #12] ; NONEON-NOSVE-NEXT: lsl w8, w8, #16 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77] -; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] ; NONEON-NOSVE-NEXT: tst w8, #0x80 -; NONEON-NOSVE-NEXT: str q0, [sp, #48] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] ; NONEON-NOSVE-NEXT: fabs s0, s0 ; NONEON-NOSVE-NEXT: fneg s1, s0 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne @@ -105,7 +91,7 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) { ; NONEON-NOSVE-NEXT: lsr w8, w8, #16 ; NONEON-NOSVE-NEXT: fmov s0, w8 ; NONEON-NOSVE-NEXT: str h0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #16 ; NONEON-NOSVE-NEXT: ret %a = load bfloat, ptr %ap %b = load bfloat, ptr %bp