@@ -351,3 +351,209 @@ define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i
351351 %res = add <16 x i64 > %z , %mul
352352 ret <16 x i64 > %res
353353}
354+
355+ define <1 x i64 > @test_not_i1 (<1 x i64 > %x , <1 x i64 > %y , <1 x i64 > %z ) {
356+ ; X64-LABEL: test_not_i1:
357+ ; X64: # %bb.0:
358+ ; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF
359+ ; X64-NEXT: imulq %rdi, %rdi
360+ ; X64-NEXT: leaq (%rdi,%rdx), %rax
361+ ; X64-NEXT: retq
362+ %x_masked = and <1 x i64 > %x , splat (i64 67108863 )
363+ %y_masked = and <1 x i64 > %x , splat (i64 67108863 )
364+ %mul = mul <1 x i64 > %x_masked , %y_masked
365+ %res = add <1 x i64 > %mul , %z
366+ ret <1 x i64 > %res
367+ }
368+
369+ define <3 x i64 > @test_i3 (<3 x i64 > %x , <3 x i64 > %y , <3 x i64 > %z ) {
370+ ; AVX-LABEL: test_i3:
371+ ; AVX: # %bb.0:
372+ ; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
373+ ; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
374+ ; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
375+ ; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0
376+ ; AVX-NEXT: retq
377+ ;
378+ ; AVX512-NOVL-LABEL: test_i3:
379+ ; AVX512-NOVL: # %bb.0:
380+ ; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
381+ ; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
382+ ; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
383+ ; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
384+ ; AVX512-NOVL-NEXT: retq
385+ ;
386+ ; AVX512VL-LABEL: test_i3:
387+ ; AVX512VL: # %bb.0:
388+ ; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
389+ ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
390+ ; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
391+ ; AVX512VL-NEXT: retq
392+ %x_masked = and <3 x i64 > %x , splat (i64 67108863 )
393+ %y_masked = and <3 x i64 > %x , splat (i64 67108863 )
394+ %mul = mul <3 x i64 > %x_masked , %y_masked
395+ %res = add <3 x i64 > %mul , %z
396+ ret <3 x i64 > %res
397+ }
398+
399+ define <5 x i64 > @test_i5 (<5 x i64 > %x , <5 x i64 > %y , <5 x i64 > %z ) {
400+ ; AVX-LABEL: test_i5:
401+ ; AVX: # %bb.0:
402+ ; AVX-NEXT: movq %rdi, %rax
403+ ; AVX-NEXT: vmovq %r8, %xmm0
404+ ; AVX-NEXT: vmovq %rcx, %xmm1
405+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
406+ ; AVX-NEXT: vmovq %rdx, %xmm1
407+ ; AVX-NEXT: vmovq %rsi, %xmm2
408+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
409+ ; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
410+ ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
411+ ; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm2
412+ ; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
413+ ; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
414+ ; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
415+ ; AVX-NEXT: vmovq %rcx, %xmm3
416+ ; AVX-NEXT: vmovq %r9, %xmm4
417+ ; AVX-NEXT: vpand %xmm3, %xmm4, %xmm3
418+ ; AVX-NEXT: vpsrlq $32, %xmm3, %xmm4
419+ ; AVX-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
420+ ; AVX-NEXT: vpsllq $33, %xmm4, %xmm4
421+ ; AVX-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
422+ ; AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1
423+ ; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1
424+ ; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm2
425+ ; AVX-NEXT: vmovdqa %ymm2, (%rdi)
426+ ; AVX-NEXT: vmovq %xmm1, 32(%rdi)
427+ ; AVX-NEXT: vzeroupper
428+ ; AVX-NEXT: retq
429+ ;
430+ ; AVX512-LABEL: test_i5:
431+ ; AVX512: # %bb.0:
432+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
433+ ; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
434+ ; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
435+ ; AVX512-NEXT: retq
436+ %x_masked = and <5 x i64 > %x , splat (i64 67108863 )
437+ %y_masked = and <5 x i64 > %x , splat (i64 67108863 )
438+ %mul = mul <5 x i64 > %x_masked , %y_masked
439+ %res = add <5 x i64 > %mul , %z
440+ ret <5 x i64 > %res
441+ }
442+
443+ define <6 x i64 > @test_i6 (<6 x i64 > %x , <6 x i64 > %y , <6 x i64 > %z ) {
444+ ; AVX-LABEL: test_i6:
445+ ; AVX: # %bb.0:
446+ ; AVX-NEXT: movq %rdi, %rax
447+ ; AVX-NEXT: vmovq %r8, %xmm0
448+ ; AVX-NEXT: vmovq %rcx, %xmm1
449+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
450+ ; AVX-NEXT: vmovq %rdx, %xmm1
451+ ; AVX-NEXT: vmovq %rsi, %xmm2
452+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
453+ ; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
454+ ; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm1
455+ ; AVX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [67108863,67108863,67108863,67108863]
456+ ; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
457+ ; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm1
458+ ; AVX-NEXT: vmovq %r9, %xmm0
459+ ; AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
460+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
461+ ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
462+ ; AVX-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
463+ ; AVX-NEXT: vpaddq {{[0-9]+}}(%rsp), %xmm0, %xmm0
464+ ; AVX-NEXT: vmovdqa %xmm0, 32(%rdi)
465+ ; AVX-NEXT: vmovdqa %ymm1, (%rdi)
466+ ; AVX-NEXT: vzeroupper
467+ ; AVX-NEXT: retq
468+ ;
469+ ; AVX512-LABEL: test_i6:
470+ ; AVX512: # %bb.0:
471+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
472+ ; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
473+ ; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
474+ ; AVX512-NEXT: retq
475+ %x_masked = and <6 x i64 > %x , splat (i64 67108863 )
476+ %y_masked = and <6 x i64 > %x , splat (i64 67108863 )
477+ %mul = mul <6 x i64 > %x_masked , %y_masked
478+ %res = add <6 x i64 > %mul , %z
479+ ret <6 x i64 > %res
480+ }
481+
482+ define <9 x i64 > @test_i9 (<9 x i64 > %x , <9 x i64 > %y , <9 x i64 > %z ) {
483+ ; AVX-LABEL: test_i9:
484+ ; AVX: # %bb.0:
485+ ; AVX-NEXT: movq %rdi, %rax
486+ ; AVX-NEXT: vmovq %r8, %xmm0
487+ ; AVX-NEXT: vmovq %rcx, %xmm1
488+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
489+ ; AVX-NEXT: vmovq %rdx, %xmm1
490+ ; AVX-NEXT: vmovq %rsi, %xmm2
491+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
492+ ; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
493+ ; AVX-NEXT: vmovq %r9, %xmm1
494+ ; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
495+ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
496+ ; AVX-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
497+ ; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
498+ ; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm3
499+ ; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm4
500+ ; AVX-NEXT: vpbroadcastq {{.*#+}} ymm5 = [67108863,67108863,67108863,67108863]
501+ ; AVX-NEXT: vpand %ymm5, %ymm0, %ymm0
502+ ; AVX-NEXT: vpand %ymm5, %ymm1, %ymm1
503+ ; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
504+ ; AVX-NEXT: vmovq %rcx, %xmm5
505+ ; AVX-NEXT: vmovq {{.*#+}} xmm6 = mem[0],zero
506+ ; AVX-NEXT: vpand %xmm5, %xmm6, %xmm5
507+ ; AVX-NEXT: vpsrlq $32, %xmm5, %xmm6
508+ ; AVX-NEXT: vpmuludq %xmm6, %xmm5, %xmm6
509+ ; AVX-NEXT: vpsllq $33, %xmm6, %xmm6
510+ ; AVX-NEXT: vpmuludq %xmm5, %xmm5, %xmm5
511+ ; AVX-NEXT: vpaddq %xmm2, %xmm5, %xmm2
512+ ; AVX-NEXT: vpaddq %xmm6, %xmm2, %xmm2
513+ ; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm4
514+ ; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm1, %ymm3
515+ ; AVX-NEXT: vmovdqa %ymm3, 32(%rdi)
516+ ; AVX-NEXT: vmovdqa %ymm4, (%rdi)
517+ ; AVX-NEXT: vmovq %xmm2, 64(%rdi)
518+ ; AVX-NEXT: vzeroupper
519+ ; AVX-NEXT: retq
520+ ;
521+ ; AVX512-LABEL: test_i9:
522+ ; AVX512: # %bb.0:
523+ ; AVX512-NEXT: movq %rdi, %rax
524+ ; AVX512-NEXT: vmovq %r8, %xmm0
525+ ; AVX512-NEXT: vmovq %rcx, %xmm1
526+ ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
527+ ; AVX512-NEXT: vmovq %rdx, %xmm1
528+ ; AVX512-NEXT: vmovq %rsi, %xmm2
529+ ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
530+ ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
531+ ; AVX512-NEXT: vmovq %r9, %xmm1
532+ ; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
533+ ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
534+ ; AVX512-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
535+ ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
536+ ; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
537+ ; AVX512-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm2
538+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
539+ ; AVX512-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
540+ ; AVX512-NEXT: vmovq %rcx, %xmm3
541+ ; AVX512-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
542+ ; AVX512-NEXT: vpand %xmm3, %xmm4, %xmm3
543+ ; AVX512-NEXT: vpsrlq $32, %xmm3, %xmm4
544+ ; AVX512-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
545+ ; AVX512-NEXT: vpsllq $33, %xmm4, %xmm4
546+ ; AVX512-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
547+ ; AVX512-NEXT: vpaddq %xmm1, %xmm3, %xmm1
548+ ; AVX512-NEXT: vpaddq %xmm4, %xmm1, %xmm1
549+ ; AVX512-NEXT: vpmadd52luq %zmm0, %zmm0, %zmm2
550+ ; AVX512-NEXT: vmovq %xmm1, 64(%rdi)
551+ ; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi)
552+ ; AVX512-NEXT: vzeroupper
553+ ; AVX512-NEXT: retq
554+ %x_masked = and <9 x i64 > %x , splat (i64 67108863 )
555+ %y_masked = and <9 x i64 > %x , splat (i64 67108863 )
556+ %mul = mul <9 x i64 > %x_masked , %y_masked
557+ %res = add <9 x i64 > %mul , %z
558+ ret <9 x i64 > %res
559+ }
0 commit comments