Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4852,6 +4852,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerMemcpyInline(MI);
case G_ZEXT:
case G_SEXT:
case G_FPEXT:
case G_ANYEXT:
return lowerEXT(MI);
case G_TRUNC:
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
.moreElementsToNextPow2(0)
.lowerIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isVector() && DstTy.isVector() &&
SrcTy.getNumElements() > 2 &&
SrcTy.getScalarSizeInBits() == 16 &&
DstTy.getScalarSizeInBits() == 64;
})
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarize(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/fmla.ll
Original file line number Diff line number Diff line change
Expand Up @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down Expand Up @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4]
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6]
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1]
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2]
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3]
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
Expand Down
26 changes: 6 additions & 20 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d4, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d2, h3
; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d4, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d2, h3
; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <4 x half> %a to <4 x double>
ret <4 x double> %1
Expand Down
50 changes: 12 additions & 38 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) {
;
; CHECK-CVT-GI-LABEL: h_to_d:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
; CHECK-CVT-GI-NEXT: fcvt d0, h0
; CHECK-CVT-GI-NEXT: fcvt d16, h1
; CHECK-CVT-GI-NEXT: fcvt d1, h2
; CHECK-CVT-GI-NEXT: fcvt d17, h3
; CHECK-CVT-GI-NEXT: fcvt d2, h4
; CHECK-CVT-GI-NEXT: fcvt d4, h5
; CHECK-CVT-GI-NEXT: fcvt d3, h6
; CHECK-CVT-GI-NEXT: fcvt d5, h7
; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: h_to_d:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
; CHECK-FP16-GI-NEXT: fcvt d0, h0
; CHECK-FP16-GI-NEXT: fcvt d16, h1
; CHECK-FP16-GI-NEXT: fcvt d1, h2
; CHECK-FP16-GI-NEXT: fcvt d17, h3
; CHECK-FP16-GI-NEXT: fcvt d2, h4
; CHECK-FP16-GI-NEXT: fcvt d4, h5
; CHECK-FP16-GI-NEXT: fcvt d3, h6
; CHECK-FP16-GI-NEXT: fcvt d5, h7
; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h
; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s
; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s
; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x double>
ret <8 x double> %1
Expand Down
Loading