@@ -398,3 +398,60 @@ define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x
398
398
%ret = and <2 x i64 > %madd , splat (i64 1 )
399
399
ret <2 x i64 > %ret
400
400
}
401
+
402
+ define <2 x i64 > @test_vpmadd52l_mul_one (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
403
+ ; CHECK-LABEL: test_vpmadd52l_mul_one:
404
+ ; CHECK: # %bb.0:
405
+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
406
+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
407
+ ; CHECK-NEXT: retq
408
+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
409
+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %ext )
410
+ ret <2 x i64 > %ifma
411
+ }
412
+
413
+ define <2 x i64 > @test_vpmadd52l_mul_one_commuted (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
414
+ ; CHECK-LABEL: test_vpmadd52l_mul_one_commuted:
415
+ ; CHECK: # %bb.0:
416
+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
417
+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
418
+ ; CHECK-NEXT: retq
419
+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
420
+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > %ext , <2 x i64 > splat(i64 1 ))
421
+ ret <2 x i64 > %ifma
422
+ }
423
+
424
+ define <2 x i64 > @test_vpmadd52l_mul_one_no_mask (<2 x i64 > %x0 , <2 x i64 > %x1 ) {
425
+ ; AVX512-LABEL: test_vpmadd52l_mul_one_no_mask:
426
+ ; AVX512: # %bb.0:
427
+ ; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
428
+ ; AVX512-NEXT: retq
429
+ ;
430
+ ; AVX-LABEL: test_vpmadd52l_mul_one_no_mask:
431
+ ; AVX: # %bb.0:
432
+ ; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
433
+ ; AVX-NEXT: retq
434
+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %x1 )
435
+ ret <2 x i64 > %ifma
436
+ }
437
+
438
+ ; Mul by (1 << 52) + 1
439
+ define <2 x i64 > @test_vpmadd52l_mul_one_in_52bits (<2 x i64 > %x0 , <2 x i32 > %x1 ) {
440
+ ; CHECK-LABEL: test_vpmadd52l_mul_one_in_52bits:
441
+ ; CHECK: # %bb.0:
442
+ ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
443
+ ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
444
+ ; CHECK-NEXT: retq
445
+ %ext = zext <2 x i32 > %x1 to <2 x i64 >
446
+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 4503599627370497 ), <2 x i64 > %ext )
447
+ ret <2 x i64 > %ifma
448
+ }
449
+
450
+ ; lo(x1) * 1 = lo(x1), the high 52 bits are zeroes still.
451
+ define <2 x i64 > @test_vpmadd52h_mul_one (<2 x i64 > %x0 , <2 x i64 > %x1 ) {
452
+ ; CHECK-LABEL: test_vpmadd52h_mul_one:
453
+ ; CHECK: # %bb.0:
454
+ ; CHECK-NEXT: retq
455
+ %ifma = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat(i64 1 ), <2 x i64 > %x1 )
456
+ ret <2 x i64 > %ifma
457
+ }
0 commit comments