Skip to content

Commit 6a0d82b

Browse files
committed
Add tests for odd sizes
Fix variable case
1 parent 9a21131 commit 6a0d82b

File tree

2 files changed

+210
-0
lines changed

2 files changed

+210
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57980,6 +57980,10 @@ static SDValue matchVPMADD52(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
5798057980
VT.getSizeInBits() < 512)
5798157981
return SDValue();
5798257982

57983+
const auto TotalSize = VT.getSizeInBits();
57984+
if (TotalSize < 128 || !isPowerOf2_64(TotalSize))
57985+
return SDValue();
57986+
5798357987
SDValue X, Y, Acc;
5798457988
if (!sd_match(N, m_Add(m_Mul(m_Value(X), m_Value(Y)), m_Value(Acc))))
5798557989
return SDValue();

llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,209 @@ define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i
351351
%res = add <16 x i64> %z, %mul
352352
ret <16 x i64> %res
353353
}
354+
355+
define <1 x i64> @test_not_i1(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
356+
; X64-LABEL: test_not_i1:
357+
; X64: # %bb.0:
358+
; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF
359+
; X64-NEXT: imulq %rdi, %rdi
360+
; X64-NEXT: leaq (%rdi,%rdx), %rax
361+
; X64-NEXT: retq
362+
%x_masked = and <1 x i64> %x, splat (i64 67108863)
363+
%y_masked = and <1 x i64> %x, splat (i64 67108863)
364+
%mul = mul <1 x i64> %x_masked, %y_masked
365+
%res = add <1 x i64> %mul, %z
366+
ret <1 x i64> %res
367+
}
368+
369+
define <3 x i64> @test_i3(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
370+
; AVX-LABEL: test_i3:
371+
; AVX: # %bb.0:
372+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
373+
; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
374+
; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
375+
; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0
376+
; AVX-NEXT: retq
377+
;
378+
; AVX512-NOVL-LABEL: test_i3:
379+
; AVX512-NOVL: # %bb.0:
380+
; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
381+
; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
382+
; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
383+
; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
384+
; AVX512-NOVL-NEXT: retq
385+
;
386+
; AVX512VL-LABEL: test_i3:
387+
; AVX512VL: # %bb.0:
388+
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
389+
; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
390+
; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
391+
; AVX512VL-NEXT: retq
392+
%x_masked = and <3 x i64> %x, splat (i64 67108863)
393+
%y_masked = and <3 x i64> %x, splat (i64 67108863)
394+
%mul = mul <3 x i64> %x_masked, %y_masked
395+
%res = add <3 x i64> %mul, %z
396+
ret <3 x i64> %res
397+
}
398+
399+
define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
400+
; AVX-LABEL: test_i5:
401+
; AVX: # %bb.0:
402+
; AVX-NEXT: movq %rdi, %rax
403+
; AVX-NEXT: vmovq %r8, %xmm0
404+
; AVX-NEXT: vmovq %rcx, %xmm1
405+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
406+
; AVX-NEXT: vmovq %rdx, %xmm1
407+
; AVX-NEXT: vmovq %rsi, %xmm2
408+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
409+
; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
410+
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
411+
; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm2
412+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
413+
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
414+
; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
415+
; AVX-NEXT: vmovq %rcx, %xmm3
416+
; AVX-NEXT: vmovq %r9, %xmm4
417+
; AVX-NEXT: vpand %xmm3, %xmm4, %xmm3
418+
; AVX-NEXT: vpsrlq $32, %xmm3, %xmm4
419+
; AVX-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
420+
; AVX-NEXT: vpsllq $33, %xmm4, %xmm4
421+
; AVX-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
422+
; AVX-NEXT: vpaddq %xmm1, %xmm3, %xmm1
423+
; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1
424+
; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm2
425+
; AVX-NEXT: vmovdqa %ymm2, (%rdi)
426+
; AVX-NEXT: vmovq %xmm1, 32(%rdi)
427+
; AVX-NEXT: vzeroupper
428+
; AVX-NEXT: retq
429+
;
430+
; AVX512-LABEL: test_i5:
431+
; AVX512: # %bb.0:
432+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
433+
; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
434+
; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
435+
; AVX512-NEXT: retq
436+
%x_masked = and <5 x i64> %x, splat (i64 67108863)
437+
%y_masked = and <5 x i64> %x, splat (i64 67108863)
438+
%mul = mul <5 x i64> %x_masked, %y_masked
439+
%res = add <5 x i64> %mul, %z
440+
ret <5 x i64> %res
441+
}
442+
443+
define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
444+
; AVX-LABEL: test_i6:
445+
; AVX: # %bb.0:
446+
; AVX-NEXT: movq %rdi, %rax
447+
; AVX-NEXT: vmovq %r8, %xmm0
448+
; AVX-NEXT: vmovq %rcx, %xmm1
449+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
450+
; AVX-NEXT: vmovq %rdx, %xmm1
451+
; AVX-NEXT: vmovq %rsi, %xmm2
452+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
453+
; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
454+
; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm1
455+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [67108863,67108863,67108863,67108863]
456+
; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
457+
; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm1
458+
; AVX-NEXT: vmovq %r9, %xmm0
459+
; AVX-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
460+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
461+
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
462+
; AVX-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
463+
; AVX-NEXT: vpaddq {{[0-9]+}}(%rsp), %xmm0, %xmm0
464+
; AVX-NEXT: vmovdqa %xmm0, 32(%rdi)
465+
; AVX-NEXT: vmovdqa %ymm1, (%rdi)
466+
; AVX-NEXT: vzeroupper
467+
; AVX-NEXT: retq
468+
;
469+
; AVX512-LABEL: test_i6:
470+
; AVX512: # %bb.0:
471+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
472+
; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
473+
; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
474+
; AVX512-NEXT: retq
475+
%x_masked = and <6 x i64> %x, splat (i64 67108863)
476+
%y_masked = and <6 x i64> %x, splat (i64 67108863)
477+
%mul = mul <6 x i64> %x_masked, %y_masked
478+
%res = add <6 x i64> %mul, %z
479+
ret <6 x i64> %res
480+
}
481+
482+
define <9 x i64> @test_i9(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
483+
; AVX-LABEL: test_i9:
484+
; AVX: # %bb.0:
485+
; AVX-NEXT: movq %rdi, %rax
486+
; AVX-NEXT: vmovq %r8, %xmm0
487+
; AVX-NEXT: vmovq %rcx, %xmm1
488+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
489+
; AVX-NEXT: vmovq %rdx, %xmm1
490+
; AVX-NEXT: vmovq %rsi, %xmm2
491+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
492+
; AVX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
493+
; AVX-NEXT: vmovq %r9, %xmm1
494+
; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
495+
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
496+
; AVX-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
497+
; AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
498+
; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm3
499+
; AVX-NEXT: vmovdqu {{[0-9]+}}(%rsp), %ymm4
500+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm5 = [67108863,67108863,67108863,67108863]
501+
; AVX-NEXT: vpand %ymm5, %ymm0, %ymm0
502+
; AVX-NEXT: vpand %ymm5, %ymm1, %ymm1
503+
; AVX-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
504+
; AVX-NEXT: vmovq %rcx, %xmm5
505+
; AVX-NEXT: vmovq {{.*#+}} xmm6 = mem[0],zero
506+
; AVX-NEXT: vpand %xmm5, %xmm6, %xmm5
507+
; AVX-NEXT: vpsrlq $32, %xmm5, %xmm6
508+
; AVX-NEXT: vpmuludq %xmm6, %xmm5, %xmm6
509+
; AVX-NEXT: vpsllq $33, %xmm6, %xmm6
510+
; AVX-NEXT: vpmuludq %xmm5, %xmm5, %xmm5
511+
; AVX-NEXT: vpaddq %xmm2, %xmm5, %xmm2
512+
; AVX-NEXT: vpaddq %xmm6, %xmm2, %xmm2
513+
; AVX-NEXT: {vex} vpmadd52luq %ymm0, %ymm0, %ymm4
514+
; AVX-NEXT: {vex} vpmadd52luq %ymm1, %ymm1, %ymm3
515+
; AVX-NEXT: vmovdqa %ymm3, 32(%rdi)
516+
; AVX-NEXT: vmovdqa %ymm4, (%rdi)
517+
; AVX-NEXT: vmovq %xmm2, 64(%rdi)
518+
; AVX-NEXT: vzeroupper
519+
; AVX-NEXT: retq
520+
;
521+
; AVX512-LABEL: test_i9:
522+
; AVX512: # %bb.0:
523+
; AVX512-NEXT: movq %rdi, %rax
524+
; AVX512-NEXT: vmovq %r8, %xmm0
525+
; AVX512-NEXT: vmovq %rcx, %xmm1
526+
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
527+
; AVX512-NEXT: vmovq %rdx, %xmm1
528+
; AVX512-NEXT: vmovq %rsi, %xmm2
529+
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
530+
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
531+
; AVX512-NEXT: vmovq %r9, %xmm1
532+
; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
533+
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
534+
; AVX512-NEXT: vinserti128 $1, {{[0-9]+}}(%rsp), %ymm1, %ymm1
535+
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
536+
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
537+
; AVX512-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm2
538+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
539+
; AVX512-NEXT: movl $67108863, %ecx # imm = 0x3FFFFFF
540+
; AVX512-NEXT: vmovq %rcx, %xmm3
541+
; AVX512-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
542+
; AVX512-NEXT: vpand %xmm3, %xmm4, %xmm3
543+
; AVX512-NEXT: vpsrlq $32, %xmm3, %xmm4
544+
; AVX512-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
545+
; AVX512-NEXT: vpsllq $33, %xmm4, %xmm4
546+
; AVX512-NEXT: vpmuludq %xmm3, %xmm3, %xmm3
547+
; AVX512-NEXT: vpaddq %xmm1, %xmm3, %xmm1
548+
; AVX512-NEXT: vpaddq %xmm4, %xmm1, %xmm1
549+
; AVX512-NEXT: vpmadd52luq %zmm0, %zmm0, %zmm2
550+
; AVX512-NEXT: vmovq %xmm1, 64(%rdi)
551+
; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi)
552+
; AVX512-NEXT: vzeroupper
553+
; AVX512-NEXT: retq
554+
%x_masked = and <9 x i64> %x, splat (i64 67108863)
555+
%y_masked = and <9 x i64> %x, splat (i64 67108863)
556+
%mul = mul <9 x i64> %x_masked, %y_masked
557+
%res = add <9 x i64> %mul, %z
558+
ret <9 x i64> %res
559+
}

0 commit comments

Comments
 (0)