diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d02f097fef829..cacb292acee18 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3065,6 +3065,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_FPEXT: + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: case TargetOpcode::G_INTRINSIC_LRINT: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 038ad77ae69b2..a88817c9d2d19 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -825,6 +825,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor( {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}}) + .moreElementsToNextPow2(0) + .widenScalarIf( + [](const LegalityQuery &Q) { + LLT DstTy = Q.Types[0]; + LLT SrcTy = Q.Types[1]; + return SrcTy.isVector() && DstTy.isVector() && + SrcTy.getScalarSizeInBits() == 16 && + DstTy.getScalarSizeInBits() == 64; + }, + changeElementTo(1, s32)) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64) .scalarize(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir index 1c10e08d54c61..50394b6bbbf99 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir @@ -32,3 +32,133 @@ body: | RET_ReallyLR ... + +--- +name: fpext_f16_f64 +body: | + bb.0: + liveins: $h0 + ; CHECK-LABEL: name: fpext_f16_f64 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16) + ; CHECK-NEXT: $d0 = COPY [[FPEXT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(s16) = COPY $h0 + %1:_(s64) = G_FPEXT %0(s16) + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 +... + +--- +name: fpext_v2f16_v2f64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fpext_v2f16_v2f64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) + ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %1:_(<4 x s16>) = COPY $d0 + %0:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %1(<4 x s16>) + %3:_(<2 x s64>) = G_FPEXT %0(<2 x s16>) + $q0 = COPY %3(<2 x s64>) + RET_ReallyLR implicit $q0 +... + +--- +name: fpext_v3f16_v3f64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fpext_v3f16_v3f64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT2]](<2 x s64>) + ; CHECK-NEXT: $d0 = COPY [[UV2]](s64) + ; CHECK-NEXT: $d1 = COPY [[UV3]](s64) + ; CHECK-NEXT: $d2 = COPY [[UV4]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1, implicit $d2 + %1:_(<4 x s16>) = COPY $d0 + %2:_(s16), %3:_(s16), %4:_(s16), %5:_(s16) = G_UNMERGE_VALUES %1(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16) + %6:_(<3 x s64>) = G_FPEXT %0(<3 x s16>) + %7:_(s64), %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %6(<3 x s64>) + $d0 = COPY %7(s64) + $d1 = COPY %8(s64) + $d2 = COPY %9(s64) + RET_ReallyLR implicit $d0, implicit $d1, implicit $d2 +... + +--- +name: fpext_v4f16_v4f64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fpext_v4f16_v4f64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) + ; CHECK-NEXT: $q0 = COPY [[FPEXT1]](<2 x s64>) + ; CHECK-NEXT: $q1 = COPY [[FPEXT2]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s64>) = G_FPEXT %0(<4 x s16>) + %2:_(<2 x s64>), %3:_(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>) + $q0 = COPY %2(<2 x s64>) + $q1 = COPY %3(<2 x s64>) + RET_ReallyLR implicit $q0, implicit $q1 +... + +--- +name: fpext_v8f16_v8f64 +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: fpext_v8f16_v8f64 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT1]](<4 x s32>) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV2]](<2 x s32>) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV4]](<2 x s32>) + ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV5]](<2 x s32>) + ; CHECK-NEXT: $q0 = COPY [[FPEXT2]](<2 x s64>) + ; CHECK-NEXT: $q1 = COPY [[FPEXT3]](<2 x s64>) + ; CHECK-NEXT: $q2 = COPY [[FPEXT4]](<2 x s64>) + ; CHECK-NEXT: $q3 = COPY [[FPEXT5]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3 + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s64>) = G_FPEXT %0(<8 x s16>) + %2:_(<2 x s64>), %3:_(<2 x s64>), %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %1(<8 x s64>) + $q0 = COPY %2(<2 x s64>) + $q1 = COPY %3(<2 x s64>) + $q2 = COPY %4(<2 x s64>) + $q3 = COPY %5(<2 x s64>) + RET_ReallyLR implicit $q0, implicit $q1, implicit $q2, implicit $q3 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 896603d6eb20d..92b273c6141d1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -555,8 +555,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll index a37aabb0b5384..12b6562b5cf0c 100644 --- a/llvm/test/CodeGen/AArch64/fmla.ll +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 6233ce743b706..1e1e25c04b384 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d4, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d2, h3 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d4, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <4 x half> %a to <4 x double> ret <4 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 86763eb5f9e3b..7b152bcccf1e5 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: mov h4, v0.h[4] -; CHECK-CVT-GI-NEXT: mov h5, v0.h[5] -; CHECK-CVT-GI-NEXT: mov h6, v0.h[6] -; CHECK-CVT-GI-NEXT: mov h7, v0.h[7] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d16, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d17, h3 -; CHECK-CVT-GI-NEXT: fcvt d2, h4 -; CHECK-CVT-GI-NEXT: fcvt d4, h5 -; CHECK-CVT-GI-NEXT: fcvt d3, h6 -; CHECK-CVT-GI-NEXT: fcvt d5, h7 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d16, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d17, h3 -; CHECK-FP16-GI-NEXT: fcvt d2, h4 -; CHECK-FP16-GI-NEXT: fcvt d4, h5 -; CHECK-FP16-GI-NEXT: fcvt d3, h6 -; CHECK-FP16-GI-NEXT: fcvt d5, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 637c02875b84e..b075a8b6f70ee 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -285,31 +285,24 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -351,24 +344,17 @@ define <4 x i32> @utest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -412,28 +398,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2273,31 +2252,24 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2337,24 +2309,17 @@ define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -2397,28 +2362,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index df90f9d5f0910..8980340a447de 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -82,11 +82,12 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) { ; ; CHECK-GI-LABEL: fpext_v3f32_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[2] +; CHECK-GI-NEXT: mov v1.s[0], v0.s[2] ; CHECK-GI-NEXT: fcvtl v0.2d, v0.2s -; CHECK-GI-NEXT: fcvt d2, s1 +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x float> %a to <3 x double> @@ -320,20 +321,11 @@ entry: } define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: fcvtl v0.2d, v0.2s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x double> ret <2 x double> %c @@ -353,12 +345,12 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v3f16_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d2, h2 +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v1.4s +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x half> %a to <3 x double> @@ -375,16 +367,9 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov h3, v0.h[3] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d4, h1 -; CHECK-GI-NEXT: fcvt d1, h2 -; CHECK-GI-NEXT: fcvt d2, h3 -; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f6053cee50dae..3dafabe0b69d7 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -4610,11 +4610,8 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v2f16_v2i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4654,11 +4651,8 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v2f16_v2i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4710,20 +4704,14 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <3 x half> %a to <3 x i64> @@ -4774,20 +4762,14 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <3 x half> %a to <3 x i64> @@ -4842,17 +4824,10 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4908,17 +4883,10 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -5005,29 +4973,16 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -5113,29 +5068,16 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5285,52 +5227,26 @@ define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <16 x half> %a to <16 x i64> @@ -5480,52 +5396,26 @@ define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <16 x half> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index b963acd8cb2a1..dbcfaff8aee05 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3088,30 +3088,14 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3797,46 +3781,19 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzs v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzs v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 5a66b68af8e96..44e6e9415263b 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2506,30 +2506,14 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3114,46 +3098,19 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzu v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzu v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x }