Skip to content

Commit 8690acd

Browse files
committed
[X86][FP16] Fix crash issue when AVX512VL is not set
Fixes problem reported on #116153.
1 parent 322eb1a commit 8690acd

File tree

7 files changed

+311
-45
lines changed

7 files changed

+311
-45
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23566,6 +23566,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2356623566

2356723567
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
2356823568
if (isSoftF16(EltVT, Subtarget)) {
23569+
if (Subtarget.hasAVX512() && !Subtarget.hasVLX())
23570+
return SDValue();
23571+
2356923572
// Break 256-bit FP vector compare into smaller ones.
2357023573
if (OpVT.is256BitVector() && !Subtarget.useAVX512Regs())
2357123574
return splitVSETCC(VT, Op0, Op1, Cond, DAG, dl);

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2159,11 +2159,30 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
21592159
define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
21602160
; KNL-LABEL: test_concat_v2i1:
21612161
; KNL: ## %bb.0:
2162-
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2163-
; KNL-NEXT: vcvtph2ps %xmm0, %ymm0
2164-
; KNL-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
2165-
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
2166-
; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1
2162+
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2163+
; KNL-NEXT: vcvtph2ps %xmm0, %xmm1
2164+
; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
2165+
; KNL-NEXT: vucomiss %xmm2, %xmm1
2166+
; KNL-NEXT: setb %al
2167+
; KNL-NEXT: andl $1, %eax
2168+
; KNL-NEXT: kmovw %eax, %k0
2169+
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2170+
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
2171+
; KNL-NEXT: vucomiss %xmm2, %xmm0
2172+
; KNL-NEXT: setb %al
2173+
; KNL-NEXT: kmovw %eax, %k1
2174+
; KNL-NEXT: kshiftlw $1, %k1, %k1
2175+
; KNL-NEXT: korw %k1, %k0, %k0
2176+
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
2177+
; KNL-NEXT: vucomiss %xmm2, %xmm1
2178+
; KNL-NEXT: seta %al
2179+
; KNL-NEXT: andl $1, %eax
2180+
; KNL-NEXT: kmovw %eax, %k1
2181+
; KNL-NEXT: vucomiss %xmm2, %xmm0
2182+
; KNL-NEXT: seta %al
2183+
; KNL-NEXT: kmovw %eax, %k2
2184+
; KNL-NEXT: kshiftlw $1, %k2, %k2
2185+
; KNL-NEXT: korw %k2, %k1, %k1
21672186
; KNL-NEXT: kandw %k1, %k0, %k1
21682187
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
21692188
; KNL-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1

llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,30 +1441,56 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
14411441
define void @half_vec_compare(ptr %x, ptr %y) {
14421442
; KNL-LABEL: half_vec_compare:
14431443
; KNL: ## %bb.0: ## %entry
1444-
; KNL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445-
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
1446-
; KNL-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
1447-
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1448-
; KNL-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc1,0x04]
1449-
; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
1444+
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445+
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1446+
; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1447+
; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1448+
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
1449+
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1450+
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
1451+
; KNL-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
1452+
; KNL-NEXT: movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00]
1453+
; KNL-NEXT: ## imm = 0xFFFF
1454+
; KNL-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
1455+
; KNL-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
1456+
; KNL-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1457+
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1458+
; KNL-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
1459+
; KNL-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
1460+
; KNL-NEXT: cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1]
1461+
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1462+
; KNL-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc2,0x01]
1463+
; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
14501464
; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
14511465
; KNL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
14521466
; KNL-NEXT: vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1453-
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
14541467
; KNL-NEXT: retq ## encoding: [0xc3]
14551468
;
14561469
; AVX512BW-LABEL: half_vec_compare:
14571470
; AVX512BW: ## %bb.0: ## %entry
1458-
; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1459-
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
1460-
; AVX512BW-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
1461-
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1462-
; AVX512BW-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfc,0xc2,0xc1,0x04]
1463-
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
1471+
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1472+
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1473+
; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1474+
; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
1475+
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
1476+
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
1477+
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
1478+
; AVX512BW-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
1479+
; AVX512BW-NEXT: movl $65535, %ecx ## encoding: [0xb9,0xff,0xff,0x00,0x00]
1480+
; AVX512BW-NEXT: ## imm = 0xFFFF
1481+
; AVX512BW-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
1482+
; AVX512BW-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
1483+
; AVX512BW-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1484+
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1485+
; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
1486+
; AVX512BW-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
1487+
; AVX512BW-NEXT: cmovpl %ecx, %eax ## encoding: [0x0f,0x4a,0xc1]
1488+
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1489+
; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x01]
1490+
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
14641491
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
14651492
; AVX512BW-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
14661493
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1467-
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
14681494
; AVX512BW-NEXT: retq ## encoding: [0xc3]
14691495
;
14701496
; SKX-LABEL: half_vec_compare:

llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Lines changed: 182 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,26 +1641,188 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
16411641
;
16421642
; AVX512-LABEL: test_fmaximum_v4f16:
16431643
; AVX512: # %bb.0:
1644-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm2
1645-
; AVX512-NEXT: vcvtph2ps %xmm1, %ymm3
1646-
; AVX512-NEXT: vcmpltps %ymm2, %ymm3, %ymm4
1647-
; AVX512-NEXT: vpmovdw %zmm4, %ymm4
1648-
; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm4
1649-
; AVX512-NEXT: vcmpunordps %ymm3, %ymm2, %ymm2
1650-
; AVX512-NEXT: vpmovdw %zmm2, %ymm2
1651-
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
1652-
; AVX512-NEXT: vpblendvb %xmm2, %xmm3, %xmm4, %xmm2
1653-
; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
1654-
; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
1655-
; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm2, %xmm0
1656-
; AVX512-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm3
1657-
; AVX512-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
1658-
; AVX512-NEXT: vcvtph2ps %xmm2, %ymm1
1659-
; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
1660-
; AVX512-NEXT: vcmpeqps %ymm3, %ymm1, %ymm1
1661-
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
1662-
; AVX512-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
1663-
; AVX512-NEXT: vzeroupper
1644+
; AVX512-NEXT: pushq %rbp
1645+
; AVX512-NEXT: .cfi_def_cfa_offset 16
1646+
; AVX512-NEXT: pushq %r15
1647+
; AVX512-NEXT: .cfi_def_cfa_offset 24
1648+
; AVX512-NEXT: pushq %r14
1649+
; AVX512-NEXT: .cfi_def_cfa_offset 32
1650+
; AVX512-NEXT: pushq %r13
1651+
; AVX512-NEXT: .cfi_def_cfa_offset 40
1652+
; AVX512-NEXT: pushq %r12
1653+
; AVX512-NEXT: .cfi_def_cfa_offset 48
1654+
; AVX512-NEXT: pushq %rbx
1655+
; AVX512-NEXT: .cfi_def_cfa_offset 56
1656+
; AVX512-NEXT: .cfi_offset %rbx, -56
1657+
; AVX512-NEXT: .cfi_offset %r12, -48
1658+
; AVX512-NEXT: .cfi_offset %r13, -40
1659+
; AVX512-NEXT: .cfi_offset %r14, -32
1660+
; AVX512-NEXT: .cfi_offset %r15, -24
1661+
; AVX512-NEXT: .cfi_offset %rbp, -16
1662+
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
1663+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1664+
; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
1665+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1666+
; AVX512-NEXT: xorl %eax, %eax
1667+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1668+
; AVX512-NEXT: movl $65535, %ecx # imm = 0xFFFF
1669+
; AVX512-NEXT: movl $0, %edx
1670+
; AVX512-NEXT: cmovpl %ecx, %edx
1671+
; AVX512-NEXT: movl $0, %edi
1672+
; AVX512-NEXT: cmoval %ecx, %edi
1673+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1674+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1675+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1676+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1677+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1678+
; AVX512-NEXT: movl $0, %esi
1679+
; AVX512-NEXT: cmovpl %ecx, %esi
1680+
; AVX512-NEXT: movl $0, %r9d
1681+
; AVX512-NEXT: cmoval %ecx, %r9d
1682+
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
1683+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1684+
; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1685+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1686+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1687+
; AVX512-NEXT: movl $0, %r8d
1688+
; AVX512-NEXT: cmovpl %ecx, %r8d
1689+
; AVX512-NEXT: movl $0, %r11d
1690+
; AVX512-NEXT: cmoval %ecx, %r11d
1691+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
1692+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1693+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
1694+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1695+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1696+
; AVX512-NEXT: movl $0, %r10d
1697+
; AVX512-NEXT: cmovpl %ecx, %r10d
1698+
; AVX512-NEXT: movl $0, %ebp
1699+
; AVX512-NEXT: cmoval %ecx, %ebp
1700+
; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1701+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1702+
; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
1703+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1704+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1705+
; AVX512-NEXT: movl $0, %ebx
1706+
; AVX512-NEXT: cmovpl %ecx, %ebx
1707+
; AVX512-NEXT: movl $0, %r14d
1708+
; AVX512-NEXT: cmoval %ecx, %r14d
1709+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
1710+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1711+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
1712+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1713+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1714+
; AVX512-NEXT: movl $0, %r15d
1715+
; AVX512-NEXT: cmovpl %ecx, %r15d
1716+
; AVX512-NEXT: movl $0, %r12d
1717+
; AVX512-NEXT: cmoval %ecx, %r12d
1718+
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm2
1719+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm3
1720+
; AVX512-NEXT: vucomiss %xmm2, %xmm3
1721+
; AVX512-NEXT: movl $0, %r13d
1722+
; AVX512-NEXT: cmoval %ecx, %r13d
1723+
; AVX512-NEXT: vmovd %r13d, %xmm2
1724+
; AVX512-NEXT: vpinsrw $1, %r12d, %xmm2, %xmm2
1725+
; AVX512-NEXT: vpinsrw $2, %r14d, %xmm2, %xmm2
1726+
; AVX512-NEXT: vpinsrw $3, %ebp, %xmm2, %xmm2
1727+
; AVX512-NEXT: vpinsrw $4, %r11d, %xmm2, %xmm2
1728+
; AVX512-NEXT: vpinsrw $5, %r9d, %xmm2, %xmm2
1729+
; AVX512-NEXT: vpinsrw $6, %edi, %xmm2, %xmm2
1730+
; AVX512-NEXT: movl $0, %edi
1731+
; AVX512-NEXT: cmovpl %ecx, %edi
1732+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1733+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1734+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1735+
; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4
1736+
; AVX512-NEXT: vucomiss %xmm3, %xmm4
1737+
; AVX512-NEXT: movl $0, %r9d
1738+
; AVX512-NEXT: cmoval %ecx, %r9d
1739+
; AVX512-NEXT: vpinsrw $7, %r9d, %xmm2, %xmm2
1740+
; AVX512-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm2
1741+
; AVX512-NEXT: vmovd %edi, %xmm3
1742+
; AVX512-NEXT: vpinsrw $1, %r15d, %xmm3, %xmm3
1743+
; AVX512-NEXT: vpinsrw $2, %ebx, %xmm3, %xmm3
1744+
; AVX512-NEXT: vpinsrw $3, %r10d, %xmm3, %xmm3
1745+
; AVX512-NEXT: vpinsrw $4, %r8d, %xmm3, %xmm3
1746+
; AVX512-NEXT: vpinsrw $5, %esi, %xmm3, %xmm3
1747+
; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3
1748+
; AVX512-NEXT: movl $0, %edx
1749+
; AVX512-NEXT: cmovpl %ecx, %edx
1750+
; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
1751+
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
1752+
; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
1753+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
1754+
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1755+
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
1756+
; AVX512-NEXT: vucomiss %xmm4, %xmm3
1757+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1758+
; AVX512-NEXT: cmovnel %eax, %edx
1759+
; AVX512-NEXT: cmovpl %eax, %edx
1760+
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm3
1761+
; AVX512-NEXT: vucomiss %xmm4, %xmm3
1762+
; AVX512-NEXT: movl $65535, %esi # imm = 0xFFFF
1763+
; AVX512-NEXT: cmovnel %eax, %esi
1764+
; AVX512-NEXT: cmovpl %eax, %esi
1765+
; AVX512-NEXT: vmovd %esi, %xmm3
1766+
; AVX512-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
1767+
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,1,1]
1768+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1769+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1770+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1771+
; AVX512-NEXT: cmovnel %eax, %edx
1772+
; AVX512-NEXT: cmovpl %eax, %edx
1773+
; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
1774+
; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7]
1775+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1776+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1777+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1778+
; AVX512-NEXT: cmovnel %eax, %edx
1779+
; AVX512-NEXT: cmovpl %eax, %edx
1780+
; AVX512-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
1781+
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
1782+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1783+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1784+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1785+
; AVX512-NEXT: cmovnel %eax, %edx
1786+
; AVX512-NEXT: cmovpl %eax, %edx
1787+
; AVX512-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
1788+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1789+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1790+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1791+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1792+
; AVX512-NEXT: cmovnel %eax, %edx
1793+
; AVX512-NEXT: cmovpl %eax, %edx
1794+
; AVX512-NEXT: vpinsrw $5, %edx, %xmm3, %xmm3
1795+
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[3,3,3,3]
1796+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1797+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1798+
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1799+
; AVX512-NEXT: cmovnel %eax, %edx
1800+
; AVX512-NEXT: cmovpl %eax, %edx
1801+
; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3
1802+
; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1803+
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1804+
; AVX512-NEXT: vucomiss %xmm4, %xmm5
1805+
; AVX512-NEXT: cmovnel %eax, %ecx
1806+
; AVX512-NEXT: cmovpl %eax, %ecx
1807+
; AVX512-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm3
1808+
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
1809+
; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm5
1810+
; AVX512-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm0
1811+
; AVX512-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm4
1812+
; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
1813+
; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0
1814+
; AVX512-NEXT: popq %rbx
1815+
; AVX512-NEXT: .cfi_def_cfa_offset 48
1816+
; AVX512-NEXT: popq %r12
1817+
; AVX512-NEXT: .cfi_def_cfa_offset 40
1818+
; AVX512-NEXT: popq %r13
1819+
; AVX512-NEXT: .cfi_def_cfa_offset 32
1820+
; AVX512-NEXT: popq %r14
1821+
; AVX512-NEXT: .cfi_def_cfa_offset 24
1822+
; AVX512-NEXT: popq %r15
1823+
; AVX512-NEXT: .cfi_def_cfa_offset 16
1824+
; AVX512-NEXT: popq %rbp
1825+
; AVX512-NEXT: .cfi_def_cfa_offset 8
16641826
; AVX512-NEXT: retq
16651827
;
16661828
; X86-LABEL: test_fmaximum_v4f16:

llvm/test/CodeGen/X86/pr116153.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3+
4+
define void @_test_func(<16 x half> %0) #0 {
5+
; CHECK-LABEL: _test_func:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
8+
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
9+
; CHECK-NEXT: xorl %eax, %eax
10+
; CHECK-NEXT: vucomiss %xmm1, %xmm1
11+
; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
12+
; CHECK-NEXT: movl $0, %edx
13+
; CHECK-NEXT: cmovnpl %ecx, %edx
14+
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
15+
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
16+
; CHECK-NEXT: vucomiss %xmm1, %xmm1
17+
; CHECK-NEXT: movl $0, %esi
18+
; CHECK-NEXT: cmovnpl %ecx, %esi
19+
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
20+
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
21+
; CHECK-NEXT: vucomiss %xmm1, %xmm1
22+
; CHECK-NEXT: movl $0, %edi
23+
; CHECK-NEXT: cmovnpl %ecx, %edi
24+
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
25+
; CHECK-NEXT: vucomiss %xmm0, %xmm0
26+
; CHECK-NEXT: cmovnpl %ecx, %eax
27+
; CHECK-NEXT: vmovd %eax, %xmm0
28+
; CHECK-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
29+
; CHECK-NEXT: vpinsrw $2, %esi, %xmm0, %xmm0
30+
; CHECK-NEXT: vpinsrw $3, %edx, %xmm0, %xmm0
31+
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
32+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
33+
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
34+
; CHECK-NEXT: vmovdqu %xmm1, 16
35+
; CHECK-NEXT: vmovdqu %xmm0, 0
36+
; CHECK-NEXT: vzeroupper
37+
; CHECK-NEXT: retq
38+
%2 = fcmp ord <16 x half> %0, zeroinitializer
39+
%3 = sext <16 x i1> %2 to <16 x i32>
40+
%4 = shufflevector <16 x i32> %3, <16 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41+
%5 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %4, <4 x i32> zeroinitializer)
42+
%6 = shufflevector <8 x i16> %5, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
43+
%7 = bitcast <16 x i16> %6 to <32 x i8>
44+
store <32 x i8> %7, ptr null, align 1
45+
ret void
46+
}
47+
48+
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
49+
50+
attributes #0 = { "target-features"="+aes,+avx,+avx2,+avx512f,+avx512vnni,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }

llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,12 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
413413
; AVX512F: # %bb.0:
414414
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
415415
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
416-
; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm2
417-
; AVX512F-NEXT: vcvtph2ps %xmm1, %ymm3
418-
; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k1
416+
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2
417+
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
418+
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
419+
; AVX512F-NEXT: seta %al
420+
; AVX512F-NEXT: negb %al
421+
; AVX512F-NEXT: kmovd %eax, %k1
419422
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
420423
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
421424
; AVX512F-NEXT: vzeroupper

0 commit comments

Comments
 (0)