@@ -290,3 +290,111 @@ define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1,
290290 %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > %and1 , <2 x i64 > %and2 )
291291 ret <2 x i64 > %1
292292}
293+
294+ define <2 x i64 > @test1_knownbits_vpmadd52l (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
295+ ; CHECK-LABEL: test1_knownbits_vpmadd52l:
296+ ; CHECK: # %bb.0:
297+ ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
298+ ; CHECK-NEXT: # xmm0 = mem[0,0]
299+ ; CHECK-NEXT: retq
300+ %and1 = and <2 x i64 > %x0 , splat (i64 4 )
301+ %and2 = and <2 x i64 > %x1 , splat (i64 4 )
302+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > splat(i64 1 ), <2 x i64 > %and1 , <2 x i64 > %and2 )
303+ %ret = and <2 x i64 > %madd , splat (i64 1 )
304+ ret <2 x i64 > %ret
305+ }
306+
307+ define <2 x i64 > @test1_knownbits_vpmadd52h (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
308+ ; CHECK-LABEL: test1_knownbits_vpmadd52h:
309+ ; CHECK: # %bb.0:
310+ ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3]
311+ ; CHECK-NEXT: # xmm0 = mem[0,0]
312+ ; CHECK-NEXT: retq
313+ %and1 = and <2 x i64 > %x0 , splat (i64 1073741824 ) ; 1LL << 30
314+ %and2 = and <2 x i64 > %x1 , splat (i64 1073741824 ) ; 1LL << 30
315+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > splat(i64 3 ), <2 x i64 > %and1 , <2 x i64 > %and2 )
316+ %ret = and <2 x i64 > %madd , splat (i64 3 )
317+ ret <2 x i64 > %ret
318+ }
319+
320+ define <2 x i64 > @test2_knownbits_vpmadd52l (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
321+ ; CHECK-LABEL: test2_knownbits_vpmadd52l:
322+ ; CHECK: # %bb.0:
323+ ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234]
324+ ; CHECK-NEXT: # xmm0 = mem[0,0]
325+ ; CHECK-NEXT: retq
326+ %and1 = and <2 x i64 > %x0 , splat (i64 67108864 ) ; 1LL << 26
327+ %and2 = and <2 x i64 > %x1 , splat (i64 33554432 ) ; 1LL << 25
328+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > splat(i64 1234 ), <2 x i64 > %and1 , <2 x i64 > %and2 )
329+ %ret = and <2 x i64 > %madd , splat (i64 1234 )
330+ ret <2 x i64 > %ret
331+ }
332+
333+ define <2 x i64 > @test2_knownbits_vpmadd52h (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
334+ ; CHECK-LABEL: test2_knownbits_vpmadd52h:
335+ ; CHECK: # %bb.0:
336+ ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
337+ ; CHECK-NEXT: # xmm0 = mem[0,0]
338+ ; CHECK-NEXT: retq
339+ %and1 = and <2 x i64 > %x0 , splat (i64 1073741824 ) ; 1LL << 30
340+ %and2 = and <2 x i64 > %x1 , splat (i64 1073741824 ) ; 1LL << 30
341+ ; add (1LL << 20) + 1
342+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > splat(i64 1025 ), <2 x i64 > %and1 , <2 x i64 > %and2 )
343+ %ret = and <2 x i64 > %madd , splat (i64 1 )
344+ ret <2 x i64 > %ret
345+ }
346+
347+ define <2 x i64 > @test3_knownbits_vpmadd52l_negative (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
348+ ; AVX512-LABEL: test3_knownbits_vpmadd52l_negative:
349+ ; AVX512: # %bb.0:
350+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
351+ ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
352+ ; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1
353+ ; AVX512-NEXT: vmovdqa %xmm2, %xmm3
354+ ; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3
355+ ; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
356+ ; AVX512-NEXT: retq
357+ ;
358+ ; AVX-LABEL: test3_knownbits_vpmadd52l_negative:
359+ ; AVX: # %bb.0:
360+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
361+ ; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
362+ ; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
363+ ; AVX-NEXT: vmovdqa %xmm2, %xmm3
364+ ; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3
365+ ; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
366+ ; AVX-NEXT: retq
367+ %and1 = and <2 x i64 > %x0 , splat (i64 67108865 ) ; (1LL << 26) + 1
368+ %or = or <2 x i64 > %x1 , splat (i64 1 )
369+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > splat(i64 1 ), <2 x i64 > %and1 , <2 x i64 > %or )
370+ %ret = and <2 x i64 > %madd , splat (i64 1 )
371+ ret <2 x i64 > %ret
372+ }
373+
374+ define <2 x i64 > @test3_knownbits_vpmadd52h_negative (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
375+ ; AVX512-LABEL: test3_knownbits_vpmadd52h_negative:
376+ ; AVX512: # %bb.0:
377+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
378+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
379+ ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
380+ ; AVX512-NEXT: vmovdqa %xmm2, %xmm3
381+ ; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3
382+ ; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
383+ ; AVX512-NEXT: retq
384+ ;
385+ ; AVX-LABEL: test3_knownbits_vpmadd52h_negative:
386+ ; AVX: # %bb.0:
387+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
388+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
389+ ; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
390+ ; AVX-NEXT: vmovdqa %xmm2, %xmm3
391+ ; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3
392+ ; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
393+ ; AVX-NEXT: retq
394+ %and1 = and <2 x i64 > %x0 , splat (i64 4194304 ) ; 1LL << 22
395+ %and2 = and <2 x i64 > %x1 , splat (i64 1073741824 ) ; 1LL << 30
396+ ; add (1LL << 20) + 1
397+ %madd = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > splat(i64 1 ), <2 x i64 > %and1 , <2 x i64 > %and2 )
398+ %ret = and <2 x i64 > %madd , splat (i64 1 )
399+ ret <2 x i64 > %ret
400+ }
0 commit comments