@@ -398,3 +398,60 @@ define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x
398398 %ret = and <2 x i64 > %madd , splat (i64 1 )
399399 ret <2 x i64 > %ret
400400}
401+
402+ define <2 x i64 > @test_vpmadd52l_mul_one (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
403+ ; CHECK-LABEL: test_vpmadd52l_mul_one:
404+ ; CHECK: # %bb.0:
405+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
406+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
407+ ; CHECK-NEXT: retq
408+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
409+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %ext )
410+ ret <2 x i64 > %ifma
411+ }
412+
413+ define <2 x i64 > @test_vpmadd52l_mul_one_commuted (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
414+ ; CHECK-LABEL: test_vpmadd52l_mul_one_commuted:
415+ ; CHECK: # %bb.0:
416+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
417+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
418+ ; CHECK-NEXT: retq
419+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
420+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > %ext , <2 x i64 > splat(i64 1 ))
421+ ret <2 x i64 > %ifma
422+ }
423+
424+ define <2 x i64 > @test_vpmadd52l_mul_one_no_mask (<2 x i64 > %x0 , <2 x i64 > %x1 ) {
425+ ; AVX512-LABEL: test_vpmadd52l_mul_one_no_mask:
426+ ; AVX512: # %bb.0:
427+ ; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
428+ ; AVX512-NEXT: retq
429+ ;
430+ ; AVX-LABEL: test_vpmadd52l_mul_one_no_mask:
431+ ; AVX: # %bb.0:
432+ ; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
433+ ; AVX-NEXT: retq
434+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %x1 )
435+ ret <2 x i64 > %ifma
436+ }
437+
438+ ; Mul by (1 << 52) + 1
439+ define <2 x i64 > @test_vpmadd52l_mul_one_in_52bits (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
440+ ; CHECK-LABEL: test_vpmadd52l_mul_one_in_52bits:
441+ ; CHECK: # %bb.0:
442+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
443+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
444+ ; CHECK-NEXT: retq
445+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
446+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 4503599627370497 ), <2 x i64 > %ext )
447+ ret <2 x i64 > %ifma
448+ }
449+
450+ ; lo(x1) * 1 = lo(x1), the high 52 bits are zeroes still.
451+ define <2 x i64 > @test_vpmadd52h_mul_one (<2 x i64 > %x0 , <2 x i64 > %x1 ) {
452+ ; CHECK-LABEL: test_vpmadd52h_mul_one:
453+ ; CHECK: # %bb.0:
454+ ; CHECK-NEXT: retq
455+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %x1 )
456+ ret <2 x i64 > %ifma
457+ }
0 commit comments