@@ -83,24 +83,25 @@ entry:
8383 ret <32 x half > %3
8484}
8585
86- define dso_local <32 x half > @test6 (<16 x i32 > %a ) local_unnamed_addr #0 {
86+ define dso_local <32 x half > @test6 (<16 x i32 > %a , < 16 x float > %b ) local_unnamed_addr #0 {
8787; CHECK-LABEL: test6:
8888; CHECK: # %bb.0: # %entry
8989; CHECK-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
90- ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
91- ; CHECK-NEXT: vfcmulcph %zmm0, %zmm3, %zmm1
92- ; CHECK-NEXT: vfcmaddcph %zmm0, %zmm2, %zmm1
93- ; CHECK-NEXT: vmovaps %zmm1, %zmm0
90+ ; CHECK-NEXT: vfcmulcph %zmm0, %zmm1, %zmm3
91+ ; CHECK-NEXT: vfcmaddcph %zmm0, %zmm2, %zmm3
92+ ; CHECK-NEXT: vaddph %zmm1, %zmm3, %zmm0
9493; CHECK-NEXT: retq
9594entry:
9695 %0 = xor <16 x i32 > %a , splat (i32 -2147483648 )
9796 %1 = bitcast <16 x i32 > %0 to <16 x float >
9897 %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float > splat (float 1 .000000e+00 ), <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
9998 %3 = bitcast <16 x float > %2 to <32 x half >
100- %4 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float > zeroinitializer , <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
99+ %4 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float > %1 , <16 x float > %b , <16 x float > zeroinitializer , i16 -1 , i32 4 )
101100 %5 = bitcast <16 x float > %4 to <32 x half >
102101 %6 = fadd <32 x half > %3 , %5
103- ret <32 x half > %6
102+ %7 = bitcast <16 x float > %b to <32 x half >
103+ %8 = fadd <32 x half > %6 , %7
104+ ret <32 x half > %8
104105}
105106
106107declare <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float >, <16 x float >, <16 x float >, i16 , i32 immarg)
0 commit comments