@@ -1322,3 +1322,47 @@ define <4 x float> @test_fmaximum_vector_zero(<4 x float> %x) {
13221322 %r = call <4 x float > @llvm.maximum.v4f32 (<4 x float > %x , <4 x float > <float 0 ., float 0 ., float 0 ., float 0 .>)
13231323 ret <4 x float > %r
13241324}
1325+
1326+ ; PR77805: Check that signed zeroes are handled correctly in this case (FIXME)
1327+ define <4 x float > @test_fmaximum_v4f32_splat (<4 x float > %x , float %y ) {
1328+ ; SSE2-LABEL: test_fmaximum_v4f32_splat:
1329+ ; SSE2: # %bb.0:
1330+ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1331+ ; SSE2-NEXT: movaps %xmm0, %xmm2
1332+ ; SSE2-NEXT: cmpunordps %xmm0, %xmm2
1333+ ; SSE2-NEXT: movaps %xmm0, %xmm3
1334+ ; SSE2-NEXT: andps %xmm2, %xmm3
1335+ ; SSE2-NEXT: maxps %xmm1, %xmm0
1336+ ; SSE2-NEXT: andnps %xmm0, %xmm2
1337+ ; SSE2-NEXT: orps %xmm3, %xmm2
1338+ ; SSE2-NEXT: movaps %xmm2, %xmm0
1339+ ; SSE2-NEXT: retq
1340+ ;
1341+ ; AVX1-LABEL: test_fmaximum_v4f32_splat:
1342+ ; AVX1: # %bb.0:
1343+ ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1344+ ; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1345+ ; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
1346+ ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1347+ ; AVX1-NEXT: retq
1348+ ;
1349+ ; AVX512-LABEL: test_fmaximum_v4f32_splat:
1350+ ; AVX512: # %bb.0:
1351+ ; AVX512-NEXT: vbroadcastss %xmm1, %xmm1
1352+ ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1353+ ; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
1354+ ; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1355+ ; AVX512-NEXT: retq
1356+ ;
1357+ ; X86-LABEL: test_fmaximum_v4f32_splat:
1358+ ; X86: # %bb.0:
1359+ ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
1360+ ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1361+ ; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
1362+ ; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1363+ ; X86-NEXT: retl
1364+ %splatinsert = insertelement <4 x float > poison, float %y , i64 0
1365+ %vec = shufflevector <4 x float > %splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
1366+ %r = call <4 x float > @llvm.maximum.v4f32 (<4 x float > %x , <4 x float > %vec ) readnone
1367+ ret <4 x float > %r
1368+ }
0 commit comments