@@ -1384,11 +1384,10 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
13841384 ret <4 x float > %r
13851385}
13861386
1387- define <4 x half > @test_fmaximum_v4f16 (<4 x half > %x , <4 x half > %y ) {
1387+ define <4 x half > @test_fmaximum_v4f16 (<4 x half > %x , <4 x half > %y ) nounwind {
13881388; SSE2-LABEL: test_fmaximum_v4f16:
13891389; SSE2: # %bb.0:
13901390; SSE2-NEXT: subq $104, %rsp
1391- ; SSE2-NEXT: .cfi_def_cfa_offset 112
13921391; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
13931392; SSE2-NEXT: psrld $16, %xmm0
13941393; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1524,13 +1523,11 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
15241523; SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
15251524; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15261525; SSE2-NEXT: addq $104, %rsp
1527- ; SSE2-NEXT: .cfi_def_cfa_offset 8
15281526; SSE2-NEXT: retq
15291527;
15301528; AVX1-LABEL: test_fmaximum_v4f16:
15311529; AVX1: # %bb.0:
15321530; AVX1-NEXT: subq $120, %rsp
1533- ; AVX1-NEXT: .cfi_def_cfa_offset 128
15341531; AVX1-NEXT: vmovaps %xmm0, %xmm2
15351532; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
15361533; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1636,29 +1633,16 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
16361633; AVX1-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
16371634; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
16381635; AVX1-NEXT: addq $120, %rsp
1639- ; AVX1-NEXT: .cfi_def_cfa_offset 8
16401636; AVX1-NEXT: retq
16411637;
16421638; AVX512-LABEL: test_fmaximum_v4f16:
16431639; AVX512: # %bb.0:
16441640; AVX512-NEXT: pushq %rbp
1645- ; AVX512-NEXT: .cfi_def_cfa_offset 16
16461641; AVX512-NEXT: pushq %r15
1647- ; AVX512-NEXT: .cfi_def_cfa_offset 24
16481642; AVX512-NEXT: pushq %r14
1649- ; AVX512-NEXT: .cfi_def_cfa_offset 32
16501643; AVX512-NEXT: pushq %r13
1651- ; AVX512-NEXT: .cfi_def_cfa_offset 40
16521644; AVX512-NEXT: pushq %r12
1653- ; AVX512-NEXT: .cfi_def_cfa_offset 48
16541645; AVX512-NEXT: pushq %rbx
1655- ; AVX512-NEXT: .cfi_def_cfa_offset 56
1656- ; AVX512-NEXT: .cfi_offset %rbx, -56
1657- ; AVX512-NEXT: .cfi_offset %r12, -48
1658- ; AVX512-NEXT: .cfi_offset %r13, -40
1659- ; AVX512-NEXT: .cfi_offset %r14, -32
1660- ; AVX512-NEXT: .cfi_offset %r15, -24
1661- ; AVX512-NEXT: .cfi_offset %rbp, -16
16621646; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
16631647; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
16641648; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
@@ -1812,23 +1796,16 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
18121796; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
18131797; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0
18141798; AVX512-NEXT: popq %rbx
1815- ; AVX512-NEXT: .cfi_def_cfa_offset 48
18161799; AVX512-NEXT: popq %r12
1817- ; AVX512-NEXT: .cfi_def_cfa_offset 40
18181800; AVX512-NEXT: popq %r13
1819- ; AVX512-NEXT: .cfi_def_cfa_offset 32
18201801; AVX512-NEXT: popq %r14
1821- ; AVX512-NEXT: .cfi_def_cfa_offset 24
18221802; AVX512-NEXT: popq %r15
1823- ; AVX512-NEXT: .cfi_def_cfa_offset 16
18241803; AVX512-NEXT: popq %rbp
1825- ; AVX512-NEXT: .cfi_def_cfa_offset 8
18261804; AVX512-NEXT: retq
18271805;
18281806; X86-LABEL: test_fmaximum_v4f16:
18291807; X86: # %bb.0:
18301808; X86-NEXT: subl $164, %esp
1831- ; X86-NEXT: .cfi_def_cfa_offset 168
18321809; X86-NEXT: vmovdqa %xmm0, %xmm2
18331810; X86-NEXT: vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
18341811; X86-NEXT: vpsrlq $48, %xmm0, %xmm0
@@ -1968,7 +1945,6 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
19681945; X86-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
19691946; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
19701947; X86-NEXT: addl $164, %esp
1971- ; X86-NEXT: .cfi_def_cfa_offset 4
19721948; X86-NEXT: retl
19731949 %r = call <4 x half > @llvm.maximum.v4f16 (<4 x half > %x , <4 x half > %y )
19741950 ret <4 x half > %r
0 commit comments