Skip to content

Commit 44560ef

Browse files
committed
Addressed feedback
Fixed testcase return, fixed naming, moved comment Early out on known bits
1 parent 6a0d82b commit 44560ef

File tree

2 files changed

+46
-23
lines changed

2 files changed

+46
-23
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57989,11 +57989,13 @@ static SDValue matchVPMADD52(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
5798957989
return SDValue();
5799057990

5799157991
KnownBits KnownX = DAG.computeKnownBits(X);
57992+
if (KnownX.countMinLeadingZeros() < 12)
57993+
return SDValue();
5799257994
KnownBits KnownY = DAG.computeKnownBits(Y);
57995+
if (KnownY.countMinLeadingZeros() < 12)
57996+
return SDValue();
5799357997
KnownBits KnownMul = KnownBits::mul(KnownX, KnownY);
57994-
if (KnownX.countMinLeadingZeros() < 12 ||
57995-
KnownY.countMinLeadingZeros() < 12 ||
57996-
KnownMul.countMinLeadingZeros() < 12)
57998+
if (KnownMul.countMinLeadingZeros() < 12)
5799757999
return SDValue();
5799858000

5799958001
auto VPMADD52Builder = [](SelectionDAG &G, SDLoc DL,

llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -253,23 +253,44 @@ define <1 x i64> @test_scalar_no_ifma(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z)
253253
ret <1 x i64> %res
254254
}
255255

256+
; 40-bit and 13-bit, too wide
256257
define <8 x i64> @test_mixed_width_too_wide(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
257-
; 40-bit and 13-bit, too wide
258258
; AVX-LABEL: test_mixed_width_too_wide:
259259
; AVX: # %bb.0:
260-
; AVX-NEXT: vmovaps %ymm5, %ymm1
261-
; AVX-NEXT: vmovaps %ymm4, %ymm0
260+
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [8191,8191,8191,8191]
261+
; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2
262+
; AVX-NEXT: vpand %ymm6, %ymm3, %ymm3
263+
; AVX-NEXT: vpmovzxdq {{.*#+}} ymm6 = [2155905028,2155905036,2155905044,2155905052]
264+
; AVX-NEXT: vpshufb %ymm6, %ymm1, %ymm7
265+
; AVX-NEXT: vpmuludq %ymm3, %ymm7, %ymm7
266+
; AVX-NEXT: vpsllq $32, %ymm7, %ymm7
267+
; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
268+
; AVX-NEXT: vpshufb %ymm6, %ymm0, %ymm3
269+
; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
270+
; AVX-NEXT: vpsllq $32, %ymm3, %ymm3
271+
; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
272+
; AVX-NEXT: vpaddq %ymm0, %ymm4, %ymm0
273+
; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0
274+
; AVX-NEXT: vpaddq %ymm1, %ymm5, %ymm1
275+
; AVX-NEXT: vpaddq %ymm7, %ymm1, %ymm1
262276
; AVX-NEXT: retq
263277
;
264278
; AVX512-LABEL: test_mixed_width_too_wide:
265279
; AVX512: # %bb.0:
266-
; AVX512-NEXT: vmovaps %zmm2, %zmm0
280+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
281+
; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm3
282+
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm0
283+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
284+
; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
285+
; AVX512-NEXT: vpsllq $32, %zmm0, %zmm0
286+
; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm1
287+
; AVX512-NEXT: vpaddq %zmm0, %zmm1, %zmm0
267288
; AVX512-NEXT: retq
268289
%x40 = and <8 x i64> %x, splat (i64 1099511627775)
269290
%y13 = and <8 x i64> %y, splat (i64 8191)
270291
%mul = mul <8 x i64> %x40, %y13
271292
%res = add <8 x i64> %z, %mul
272-
ret <8 x i64> %z
293+
ret <8 x i64> %res
273294
}
274295

275296
define <8 x i64> @test_zext32_inputs_not_safe(<8 x i32> %xi32, <8 x i32> %yi32, <8 x i64> %z) {
@@ -352,8 +373,8 @@ define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i
352373
ret <16 x i64> %res
353374
}
354375

355-
define <1 x i64> @test_not_i1(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
356-
; X64-LABEL: test_not_i1:
376+
define <1 x i64> @test_not_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
377+
; X64-LABEL: test_not_v1i64:
357378
; X64: # %bb.0:
358379
; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF
359380
; X64-NEXT: imulq %rdi, %rdi
@@ -366,24 +387,24 @@ define <1 x i64> @test_not_i1(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
366387
ret <1 x i64> %res
367388
}
368389

369-
define <3 x i64> @test_i3(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
370-
; AVX-LABEL: test_i3:
390+
define <3 x i64> @test_v3i64(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
391+
; AVX-LABEL: test_v3i64:
371392
; AVX: # %bb.0:
372393
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
373394
; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
374395
; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
375396
; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0
376397
; AVX-NEXT: retq
377398
;
378-
; AVX512-NOVL-LABEL: test_i3:
399+
; AVX512-NOVL-LABEL: test_v3i64:
379400
; AVX512-NOVL: # %bb.0:
380401
; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
381402
; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
382403
; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
383404
; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
384405
; AVX512-NOVL-NEXT: retq
385406
;
386-
; AVX512VL-LABEL: test_i3:
407+
; AVX512VL-LABEL: test_v3i64:
387408
; AVX512VL: # %bb.0:
388409
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
389410
; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
@@ -396,8 +417,8 @@ define <3 x i64> @test_i3(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
396417
ret <3 x i64> %res
397418
}
398419

399-
define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
400-
; AVX-LABEL: test_i5:
420+
define <5 x i64> @test_v5i64(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
421+
; AVX-LABEL: test_v5i64:
401422
; AVX: # %bb.0:
402423
; AVX-NEXT: movq %rdi, %rax
403424
; AVX-NEXT: vmovq %r8, %xmm0
@@ -427,7 +448,7 @@ define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
427448
; AVX-NEXT: vzeroupper
428449
; AVX-NEXT: retq
429450
;
430-
; AVX512-LABEL: test_i5:
451+
; AVX512-LABEL: test_v5i64:
431452
; AVX512: # %bb.0:
432453
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
433454
; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
@@ -440,8 +461,8 @@ define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
440461
ret <5 x i64> %res
441462
}
442463

443-
define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
444-
; AVX-LABEL: test_i6:
464+
define <6 x i64> @test_v6i64(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
465+
; AVX-LABEL: test_v6i64:
445466
; AVX: # %bb.0:
446467
; AVX-NEXT: movq %rdi, %rax
447468
; AVX-NEXT: vmovq %r8, %xmm0
@@ -466,7 +487,7 @@ define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
466487
; AVX-NEXT: vzeroupper
467488
; AVX-NEXT: retq
468489
;
469-
; AVX512-LABEL: test_i6:
490+
; AVX512-LABEL: test_v6i64:
470491
; AVX512: # %bb.0:
471492
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
472493
; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
@@ -479,8 +500,8 @@ define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
479500
ret <6 x i64> %res
480501
}
481502

482-
define <9 x i64> @test_i9(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
483-
; AVX-LABEL: test_i9:
503+
define <9 x i64> @test_v9i64(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
504+
; AVX-LABEL: test_v9i64:
484505
; AVX: # %bb.0:
485506
; AVX-NEXT: movq %rdi, %rax
486507
; AVX-NEXT: vmovq %r8, %xmm0
@@ -518,7 +539,7 @@ define <9 x i64> @test_i9(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
518539
; AVX-NEXT: vzeroupper
519540
; AVX-NEXT: retq
520541
;
521-
; AVX512-LABEL: test_i9:
542+
; AVX512-LABEL: test_v9i64:
522543
; AVX512: # %bb.0:
523544
; AVX512-NEXT: movq %rdi, %rax
524545
; AVX512-NEXT: vmovq %r8, %xmm0

0 commit comments

Comments
 (0)