Skip to content

Commit 484cea4

Browse files
committed
Widen 128/256 bit vector types when AVX512VL is not available.
1 parent 9f7bddb commit 484cea4

File tree

3 files changed

+129
-152
lines changed

3 files changed

+129
-152
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 66 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,6 +1829,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18291829
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
18301830
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
18311831
}
1832+
1833+
for (MVT VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f32,
1834+
MVT::v4f64, MVT::v16f32, MVT::v8f64})
1835+
setOperationAction(ISD::FLDEXP, VT, Custom);
1836+
1837+
if (Subtarget.hasFP16()) {
1838+
for (MVT VT : {MVT::f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
1839+
setOperationAction(ISD::FLDEXP, VT, Custom);
1840+
}
1841+
18321842
setOperationAction(ISD::LRINT, MVT::v16f32,
18331843
Subtarget.hasDQI() ? Legal : Custom);
18341844
setOperationAction(ISD::LRINT, MVT::v8f64,
@@ -2591,26 +2601,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
25912601
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
25922602
}
25932603

2594-
if (Subtarget.hasAVX512()) {
2595-
for (MVT VT : { MVT::f32, MVT::f64, MVT::v16f32, MVT::v8f64})
2596-
setOperationAction(ISD::FLDEXP, VT, Custom);
2597-
2598-
if (Subtarget.hasVLX()) {
2599-
for (MVT VT : { MVT::v4f32, MVT::v2f64, MVT::v8f32, MVT::v4f64 })
2600-
setOperationAction(ISD::FLDEXP, VT, Custom);
2601-
2602-
if (Subtarget.hasFP16()) {
2603-
for (MVT VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16 })
2604-
setOperationAction(ISD::FLDEXP, VT, Custom);
2605-
}
2606-
}
2607-
2608-
if (Subtarget.hasFP16()) {
2609-
for (MVT VT : { MVT::f16, MVT::v32f16 })
2610-
setOperationAction(ISD::FLDEXP, VT, Custom);
2611-
}
2612-
}
2613-
26142604
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
26152605
// is. We should promote the value to 64-bits to solve this.
26162606
// This is what the CRT headers do - `fmodf` is an inline header
@@ -19177,48 +19167,67 @@ static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
1917719167
SDValue Exp = Op.getOperand(1);
1917819168
MVT XVT, ExpVT;
1917919169

19180-
switch (XTy.SimpleTy) {
19181-
default:
19182-
return SDValue();
19183-
case MVT::f16:
19184-
if (Subtarget.hasFP16()) {
19185-
XVT = Subtarget.hasVLX() ? MVT::v8f16 : MVT::v32f16;
19186-
ExpVT = XVT;
19187-
break;
19188-
}
19189-
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19190-
[[fallthrough]];
19191-
case MVT::f32:
19192-
XVT = MVT::v4f32;
19193-
ExpVT = MVT::v4f32;
19170+
switch (XTy.SimpleTy) {
19171+
default:
19172+
return SDValue();
19173+
case MVT::f16:
19174+
if (Subtarget.hasFP16()) {
19175+
XVT = MVT::v8f16;
19176+
ExpVT = XVT;
19177+
break;
19178+
}
19179+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19180+
[[fallthrough]];
19181+
case MVT::f32:
19182+
XVT = MVT::v4f32;
19183+
ExpVT = MVT::v4f32;
19184+
break;
19185+
case MVT::f64:
19186+
XVT = MVT::v2f64;
19187+
ExpVT = MVT::v2f64;
19188+
break;
19189+
case MVT::v4f32:
19190+
case MVT::v2f64:
19191+
if (!Subtarget.hasVLX()) {
19192+
XVT = XTy == MVT::v4f32 ? MVT::v16f32 : MVT::v8f64;
19193+
ExpVT = XVT;
1919419194
break;
19195-
case MVT::f64:
19196-
XVT = MVT::v2f64;
19197-
ExpVT = MVT::v2f64;
19195+
}
19196+
[[fallthrough]];
19197+
case MVT::v8f32:
19198+
case MVT::v4f64:
19199+
if (!Subtarget.hasVLX()) {
19200+
XVT = XTy == MVT::v8f32 ? MVT::v16f32 : MVT::v8f64;
19201+
ExpVT = XVT;
1919819202
break;
19199-
case MVT::v4f32:
19200-
case MVT::v2f64:
19201-
case MVT::v8f32:
19202-
case MVT::v4f64:
19203-
case MVT::v16f32:
19204-
case MVT::v8f64:
19205-
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19206-
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19203+
}
19204+
[[fallthrough]];
19205+
case MVT::v16f32:
19206+
case MVT::v8f64:
19207+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19208+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
1920719209
}
1920819210

19209-
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1921019211
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19211-
SDValue VX =
19212-
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
19213-
SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
19214-
DAG.getUNDEF(ExpVT), Exp, Zero);
19215-
SDValue Scalef = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19216-
SDValue Final =
19217-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), Scalef, Zero);
19218-
if (X.getValueType() != XTy)
19219-
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19220-
DAG.getIntPtrConstant(1, SDLoc(Op)));
19221-
return Final;
19212+
if (XTy.isVector()) {
19213+
SDValue WideX =
19214+
DAG.getInsertSubvector(DL, DAG.getUNDEF(XVT), X, 0);
19215+
SDValue WideExp =
19216+
DAG.getInsertSubvector(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19217+
SDValue Scalef =
19218+
DAG.getNode(X86ISD::SCALEF, DL, XVT, WideX, WideExp, WideX);
19219+
SDValue Final = DAG.getExtractSubvector(DL, XTy, Scalef, 0);
19220+
return Final;
19221+
} else {
19222+
SDValue VX = DAG.getInsertVectorElt(DL, DAG.getUNDEF(XVT), X, 0);
19223+
SDValue VExp = DAG.getInsertVectorElt(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19224+
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19225+
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
19226+
if (X.getValueType() != XTy)
19227+
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19228+
DAG.getIntPtrConstant(1, SDLoc(Op)));
19229+
return Final;
19230+
}
1922219231
}
1922319232

1922419233
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,11 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
114114
;
115115
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116116
; CHECK-ONLY-AVX512F: # %bb.0:
117-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm1
118-
; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
119-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm1, %xmm2, %xmm1
120-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,1,1]
121-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
122-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
123-
; CHECK-ONLY-AVX512F-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
124-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[2,3,2,3]
125-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
126-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
127-
; CHECK-ONLY-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
128-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
117+
; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
129118
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
130-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm2, %xmm0
131-
; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
119+
; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
120+
; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
121+
; CHECK-ONLY-AVX512F-NEXT: vzeroupper
132122
; CHECK-ONLY-AVX512F-NEXT: retq
133123
;
134124
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:

llvm/test/CodeGen/X86/ldexp-avx512.ll

Lines changed: 59 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
3-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VL
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512FP16
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VLF
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VLFP16
46

57
define half @test_half(half %x, i32 %exp) nounwind {
6-
; AVX512-LABEL: test_half:
7-
; AVX512: # %bb.0: # %entry
8-
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
9-
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
10-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
11-
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
12-
; AVX512-NEXT: retq
8+
; AVX512F-LABEL: test_half:
9+
; AVX512F: # %bb.0: # %entry
10+
; AVX512F-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
11+
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
12+
; AVX512F-NEXT: vscalefss %xmm1, %xmm0, %xmm0
13+
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
14+
; AVX512F-NEXT: retq
15+
;
16+
; AVX512FP16-LABEL: test_half:
17+
; AVX512FP16: # %bb.0: # %entry
18+
; AVX512FP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
19+
; AVX512FP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
20+
; AVX512FP16-NEXT: retq
1321
;
1422
; AVX512VL-LABEL: test_half:
1523
; AVX512VL: # %bb.0: # %entry
16-
; AVX512VL-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
17-
; AVX512VL-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
24+
; AVX512VL-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
25+
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
26+
; AVX512VL-NEXT: vscalefss %xmm1, %xmm0, %xmm0
27+
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1828
; AVX512VL-NEXT: retq
29+
;
30+
; AVX512VLFP16-LABEL: test_half:
31+
; AVX512VLFP16: # %bb.0: # %entry
32+
; AVX512VLFP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
33+
; AVX512VLFP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
34+
; AVX512VLFP16-NEXT: retq
1935
entry:
2036
%r = tail call fast half @llvm.ldexp.f16.i32(half %x, i32 %exp)
2137
ret half %r
@@ -240,30 +256,24 @@ declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
240256
define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
241257
; AVX512-LABEL: test_ldexp_4xfloat:
242258
; AVX512: # %bb.0:
243-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm2
244-
; AVX512-NEXT: vscalefss %xmm2, %xmm0, %xmm2
245-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
246-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[1,1,1,1]
247-
; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
248-
; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
249-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
250-
; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
251-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
252-
; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
253-
; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
254-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
255-
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
256-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
259+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
257260
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
258-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
259-
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
261+
; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
262+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
263+
; AVX512-NEXT: vzeroupper
260264
; AVX512-NEXT: retq
261265
;
262266
; AVX512VL-LABEL: test_ldexp_4xfloat:
263267
; AVX512VL: # %bb.0:
264268
; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
265269
; AVX512VL-NEXT: vscalefps %xmm1, %xmm0, %xmm0
266270
; AVX512VL-NEXT: retq
271+
;
272+
; AVX512VLFP16-LABEL: test_ldexp_4xfloat:
273+
; AVX512VLFP16: # %bb.0:
274+
; AVX512VLFP16-NEXT: vcvtdq2ps %xmm1, %xmm1
275+
; AVX512VLFP16-NEXT: vscalefps %xmm1, %xmm0, %xmm0
276+
; AVX512VLFP16-NEXT: retq
267277
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %x, <4 x i32> %exp)
268278
ret <4 x float> %r
269279
}
@@ -663,50 +673,23 @@ declare <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half>, <16 x i16>)
663673
define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind {
664674
; AVX512-LABEL: test_ldexp_8xfloat:
665675
; AVX512: # %bb.0:
666-
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
667-
; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
668-
; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm4
669-
; AVX512-NEXT: vscalefss %xmm4, %xmm2, %xmm4
670-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
671-
; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[1,1,1,1]
672-
; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
673-
; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
674-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
675-
; AVX512-NEXT: vshufpd {{.*#+}} xmm5 = xmm2[1,0]
676-
; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[2,3,2,3]
677-
; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
678-
; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
679-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
680-
; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
681-
; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
682-
; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm3
683-
; AVX512-NEXT: vscalefss %xmm3, %xmm2, %xmm2
684-
; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
685-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm3
686-
; AVX512-NEXT: vscalefss %xmm3, %xmm0, %xmm3
687-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
688-
; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[1,1,1,1]
689-
; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
690-
; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
691-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
692-
; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
693-
; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[2,3,2,3]
694-
; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
695-
; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
696-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
697-
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
698-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
699-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
700-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
701-
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
702-
; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
676+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
677+
; AVX512-NEXT: vcvtdq2ps %ymm1, %ymm1
678+
; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
679+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
703680
; AVX512-NEXT: retq
704681
;
705682
; AVX512VL-LABEL: test_ldexp_8xfloat:
706683
; AVX512VL: # %bb.0:
707684
; AVX512VL-NEXT: vcvtdq2ps %ymm1, %ymm1
708685
; AVX512VL-NEXT: vscalefps %ymm1, %ymm0, %ymm0
709686
; AVX512VL-NEXT: retq
687+
;
688+
; AVX512VLFP16-LABEL: test_ldexp_8xfloat:
689+
; AVX512VLFP16: # %bb.0:
690+
; AVX512VLFP16-NEXT: vcvtdq2ps %ymm1, %ymm1
691+
; AVX512VLFP16-NEXT: vscalefps %ymm1, %ymm0, %ymm0
692+
; AVX512VLFP16-NEXT: retq
710693
%r = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> %x, <8 x i32> %exp)
711694
ret <8 x float> %r
712695
}
@@ -715,30 +698,23 @@ declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
715698
define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwind {
716699
; AVX512-LABEL: test_ldexp_4xdouble:
717700
; AVX512: # %bb.0:
718-
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
719-
; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[2,3,2,3]
720-
; AVX512-NEXT: vcvtdq2pd %xmm3, %xmm3
721-
; AVX512-NEXT: vscalefsd %xmm3, %xmm2, %xmm3
722-
; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm4
723-
; AVX512-NEXT: vscalefsd %xmm4, %xmm0, %xmm4
724-
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
725-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[3,3,3,3]
726-
; AVX512-NEXT: vcvtdq2pd %xmm4, %xmm4
727-
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
728-
; AVX512-NEXT: vscalefsd %xmm4, %xmm2, %xmm2
729-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
730-
; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
731-
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
732-
; AVX512-NEXT: vscalefsd %xmm1, %xmm0, %xmm0
733-
; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
734-
; AVX512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm3[0],ymm0[0],ymm3[2],ymm0[2]
701+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
702+
; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
703+
; AVX512-NEXT: vscalefpd %zmm1, %zmm0, %zmm0
704+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
735705
; AVX512-NEXT: retq
736706
;
737707
; AVX512VL-LABEL: test_ldexp_4xdouble:
738708
; AVX512VL: # %bb.0:
739709
; AVX512VL-NEXT: vcvtdq2pd %xmm1, %ymm1
740710
; AVX512VL-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
741711
; AVX512VL-NEXT: retq
712+
;
713+
; AVX512VLFP16-LABEL: test_ldexp_4xdouble:
714+
; AVX512VLFP16: # %bb.0:
715+
; AVX512VLFP16-NEXT: vcvtdq2pd %xmm1, %ymm1
716+
; AVX512VLFP16-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
717+
; AVX512VLFP16-NEXT: retq
742718
%r = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %x, <4 x i32> %exp)
743719
ret <4 x double> %r
744720
}
@@ -1495,3 +1471,5 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi
14951471
}
14961472
declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>)
14971473

1474+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1475+
; AVX512VLF: {{.*}}

0 commit comments

Comments
 (0)