@@ -1641,26 +1641,188 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
16411641;
16421642; AVX512-LABEL: test_fmaximum_v4f16:
16431643; AVX512: # %bb.0:
1644- ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm2
1645- ; AVX512-NEXT: vcvtph2ps %xmm1, %ymm3
1646- ; AVX512-NEXT: vcmpltps %ymm2, %ymm3, %ymm4
1647- ; AVX512-NEXT: vpmovdw %zmm4, %ymm4
1648- ; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm4
1649- ; AVX512-NEXT: vcmpunordps %ymm3, %ymm2, %ymm2
1650- ; AVX512-NEXT: vpmovdw %zmm2, %ymm2
1651- ; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
1652- ; AVX512-NEXT: vpblendvb %xmm2, %xmm3, %xmm4, %xmm2
1653- ; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
1654- ; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
1655- ; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm2, %xmm0
1656- ; AVX512-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm3
1657- ; AVX512-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
1658- ; AVX512-NEXT: vcvtph2ps %xmm2, %ymm1
1659- ; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
1660- ; AVX512-NEXT: vcmpeqps %ymm3, %ymm1, %ymm1
1661- ; AVX512-NEXT: vpmovdw %zmm1, %ymm1
1662- ; AVX512-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
1663- ; AVX512-NEXT: vzeroupper
1644+ ; AVX512-NEXT: pushq %rbp
1645+ ; AVX512-NEXT: .cfi_def_cfa_offset 16
1646+ ; AVX512-NEXT: pushq %r15
1647+ ; AVX512-NEXT: .cfi_def_cfa_offset 24
1648+ ; AVX512-NEXT: pushq %r14
1649+ ; AVX512-NEXT: .cfi_def_cfa_offset 32
1650+ ; AVX512-NEXT: pushq %r13
1651+ ; AVX512-NEXT: .cfi_def_cfa_offset 40
1652+ ; AVX512-NEXT: pushq %r12
1653+ ; AVX512-NEXT: .cfi_def_cfa_offset 48
1654+ ; AVX512-NEXT: pushq %rbx
1655+ ; AVX512-NEXT: .cfi_def_cfa_offset 56
1656+ ; AVX512-NEXT: .cfi_offset %rbx, -56
1657+ ; AVX512-NEXT: .cfi_offset %r12, -48
1658+ ; AVX512-NEXT: .cfi_offset %r13, -40
1659+ ; AVX512-NEXT: .cfi_offset %r14, -32
1660+ ; AVX512-NEXT: .cfi_offset %r15, -24
1661+ ; AVX512-NEXT: .cfi_offset %rbp, -16
1662+ ; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
1663+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1664+ ; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
1665+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1666+ ; AVX512-NEXT: xorl %eax, %eax
1667+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1668+ ; AVX512-NEXT: movl $65535, %ecx # imm = 0xFFFF
1669+ ; AVX512-NEXT: movl $0, %edx
1670+ ; AVX512-NEXT: cmovpl %ecx, %edx
1671+ ; AVX512-NEXT: movl $0, %edi
1672+ ; AVX512-NEXT: cmoval %ecx, %edi
1673+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1674+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1675+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1676+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1677+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1678+ ; AVX512-NEXT: movl $0, %esi
1679+ ; AVX512-NEXT: cmovpl %ecx, %esi
1680+ ; AVX512-NEXT: movl $0, %r9d
1681+ ; AVX512-NEXT: cmoval %ecx, %r9d
1682+ ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
1683+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1684+ ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
1685+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1686+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1687+ ; AVX512-NEXT: movl $0, %r8d
1688+ ; AVX512-NEXT: cmovpl %ecx, %r8d
1689+ ; AVX512-NEXT: movl $0, %r11d
1690+ ; AVX512-NEXT: cmoval %ecx, %r11d
1691+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
1692+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1693+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
1694+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1695+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1696+ ; AVX512-NEXT: movl $0, %r10d
1697+ ; AVX512-NEXT: cmovpl %ecx, %r10d
1698+ ; AVX512-NEXT: movl $0, %ebp
1699+ ; AVX512-NEXT: cmoval %ecx, %ebp
1700+ ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
1701+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1702+ ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
1703+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1704+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1705+ ; AVX512-NEXT: movl $0, %ebx
1706+ ; AVX512-NEXT: cmovpl %ecx, %ebx
1707+ ; AVX512-NEXT: movl $0, %r14d
1708+ ; AVX512-NEXT: cmoval %ecx, %r14d
1709+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
1710+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
1711+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
1712+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1713+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1714+ ; AVX512-NEXT: movl $0, %r15d
1715+ ; AVX512-NEXT: cmovpl %ecx, %r15d
1716+ ; AVX512-NEXT: movl $0, %r12d
1717+ ; AVX512-NEXT: cmoval %ecx, %r12d
1718+ ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm2
1719+ ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm3
1720+ ; AVX512-NEXT: vucomiss %xmm2, %xmm3
1721+ ; AVX512-NEXT: movl $0, %r13d
1722+ ; AVX512-NEXT: cmoval %ecx, %r13d
1723+ ; AVX512-NEXT: vmovd %r13d, %xmm2
1724+ ; AVX512-NEXT: vpinsrw $1, %r12d, %xmm2, %xmm2
1725+ ; AVX512-NEXT: vpinsrw $2, %r14d, %xmm2, %xmm2
1726+ ; AVX512-NEXT: vpinsrw $3, %ebp, %xmm2, %xmm2
1727+ ; AVX512-NEXT: vpinsrw $4, %r11d, %xmm2, %xmm2
1728+ ; AVX512-NEXT: vpinsrw $5, %r9d, %xmm2, %xmm2
1729+ ; AVX512-NEXT: vpinsrw $6, %edi, %xmm2, %xmm2
1730+ ; AVX512-NEXT: movl $0, %edi
1731+ ; AVX512-NEXT: cmovpl %ecx, %edi
1732+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1733+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1734+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1735+ ; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4
1736+ ; AVX512-NEXT: vucomiss %xmm3, %xmm4
1737+ ; AVX512-NEXT: movl $0, %r9d
1738+ ; AVX512-NEXT: cmoval %ecx, %r9d
1739+ ; AVX512-NEXT: vpinsrw $7, %r9d, %xmm2, %xmm2
1740+ ; AVX512-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm2
1741+ ; AVX512-NEXT: vmovd %edi, %xmm3
1742+ ; AVX512-NEXT: vpinsrw $1, %r15d, %xmm3, %xmm3
1743+ ; AVX512-NEXT: vpinsrw $2, %ebx, %xmm3, %xmm3
1744+ ; AVX512-NEXT: vpinsrw $3, %r10d, %xmm3, %xmm3
1745+ ; AVX512-NEXT: vpinsrw $4, %r8d, %xmm3, %xmm3
1746+ ; AVX512-NEXT: vpinsrw $5, %esi, %xmm3, %xmm3
1747+ ; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3
1748+ ; AVX512-NEXT: movl $0, %edx
1749+ ; AVX512-NEXT: cmovpl %ecx, %edx
1750+ ; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
1751+ ; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
1752+ ; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
1753+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
1754+ ; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
1755+ ; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
1756+ ; AVX512-NEXT: vucomiss %xmm4, %xmm3
1757+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1758+ ; AVX512-NEXT: cmovnel %eax, %edx
1759+ ; AVX512-NEXT: cmovpl %eax, %edx
1760+ ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm3
1761+ ; AVX512-NEXT: vucomiss %xmm4, %xmm3
1762+ ; AVX512-NEXT: movl $65535, %esi # imm = 0xFFFF
1763+ ; AVX512-NEXT: cmovnel %eax, %esi
1764+ ; AVX512-NEXT: cmovpl %eax, %esi
1765+ ; AVX512-NEXT: vmovd %esi, %xmm3
1766+ ; AVX512-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
1767+ ; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,1,1]
1768+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1769+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1770+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1771+ ; AVX512-NEXT: cmovnel %eax, %edx
1772+ ; AVX512-NEXT: cmovpl %eax, %edx
1773+ ; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
1774+ ; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7]
1775+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1776+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1777+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1778+ ; AVX512-NEXT: cmovnel %eax, %edx
1779+ ; AVX512-NEXT: cmovpl %eax, %edx
1780+ ; AVX512-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
1781+ ; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
1782+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1783+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1784+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1785+ ; AVX512-NEXT: cmovnel %eax, %edx
1786+ ; AVX512-NEXT: cmovpl %eax, %edx
1787+ ; AVX512-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
1788+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1789+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1790+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1791+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1792+ ; AVX512-NEXT: cmovnel %eax, %edx
1793+ ; AVX512-NEXT: cmovpl %eax, %edx
1794+ ; AVX512-NEXT: vpinsrw $5, %edx, %xmm3, %xmm3
1795+ ; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[3,3,3,3]
1796+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1797+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1798+ ; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
1799+ ; AVX512-NEXT: cmovnel %eax, %edx
1800+ ; AVX512-NEXT: cmovpl %eax, %edx
1801+ ; AVX512-NEXT: vpinsrw $6, %edx, %xmm3, %xmm3
1802+ ; AVX512-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1803+ ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
1804+ ; AVX512-NEXT: vucomiss %xmm4, %xmm5
1805+ ; AVX512-NEXT: cmovnel %eax, %ecx
1806+ ; AVX512-NEXT: cmovpl %eax, %ecx
1807+ ; AVX512-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm3
1808+ ; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
1809+ ; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm5
1810+ ; AVX512-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm0
1811+ ; AVX512-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm4
1812+ ; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
1813+ ; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0
1814+ ; AVX512-NEXT: popq %rbx
1815+ ; AVX512-NEXT: .cfi_def_cfa_offset 48
1816+ ; AVX512-NEXT: popq %r12
1817+ ; AVX512-NEXT: .cfi_def_cfa_offset 40
1818+ ; AVX512-NEXT: popq %r13
1819+ ; AVX512-NEXT: .cfi_def_cfa_offset 32
1820+ ; AVX512-NEXT: popq %r14
1821+ ; AVX512-NEXT: .cfi_def_cfa_offset 24
1822+ ; AVX512-NEXT: popq %r15
1823+ ; AVX512-NEXT: .cfi_def_cfa_offset 16
1824+ ; AVX512-NEXT: popq %rbp
1825+ ; AVX512-NEXT: .cfi_def_cfa_offset 8
16641826; AVX512-NEXT: retq
16651827;
16661828; X86-LABEL: test_fmaximum_v4f16:
0 commit comments