Skip to content

Commit 0d67367

Browse files
RKSimontru
authored andcommitted
[X86] getScalarMaskingNode - if the mask is zero just return the blended passthrough and preserved source value (#153575)
We already handle the case if the mask is one, so I added the other case where the op is replaced with a MOVSH/S/D blend. This assumes the scalar passthrough is op0. I had to adjust the test case for #98306 as AFAICT it'd been over reduced Fixes #153570 (cherry picked from commit ba707db)
1 parent 1db648d commit 0d67367

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26236,10 +26236,9 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
2623626236
SDValue PreservedSrc,
2623726237
const X86Subtarget &Subtarget,
2623826238
SelectionDAG &DAG) {
26239-
26240-
if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
26241-
if (MaskConst->getZExtValue() & 0x1)
26242-
return Op;
26239+
auto *MaskConst = dyn_cast<ConstantSDNode>(Mask);
26240+
if (MaskConst && (MaskConst->getZExtValue() & 0x1))
26241+
return Op;
2624326242

2624426243
MVT VT = Op.getSimpleValueType();
2624526244
SDLoc dl(Op);
@@ -26255,6 +26254,17 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
2625526254

2625626255
if (PreservedSrc.isUndef())
2625726256
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
26257+
26258+
if (MaskConst) {
26259+
assert((MaskConst->getZExtValue() & 0x1) == 0 && "Expected false mask");
26260+
// Discard op and blend passthrough with scalar op src/dst.
26261+
SmallVector<int, 16> ShuffleMask(VT.getVectorNumElements());
26262+
std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
26263+
ShuffleMask[0] = VT.getVectorNumElements();
26264+
return DAG.getVectorShuffle(VT, dl, Op.getOperand(0), PreservedSrc,
26265+
ShuffleMask);
26266+
}
26267+
2625826268
return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
2625926269
}
2626026270

llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,14 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
278278
ret <4 x float> %res
279279
}
280280

281-
define <4 x float> @PR98306() {
281+
define <4 x float> @PR98306(i8 %m) {
282282
; CHECK-LABEL: PR98306:
283283
; CHECK: ## %bb.0:
284-
; CHECK-NEXT: kxorw %k0, %k0, %k1
284+
; CHECK-NEXT: kmovd %edi, %k1
285285
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
286286
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
287287
; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z}
288288
; CHECK-NEXT: retq
289-
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 0, i32 4)
289+
%res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> <float 7.812500e-03, float 0x4130092000000000, float 0x4150094000000000, float 0x4170096000000000>, <4 x float> <float 2.000000e+00, float 0x4188098000000000, float 0x4198099000000000, float 0x41A809A000000000>, <4 x float> <float 3.200000e+01, float 0x41B809B000000000, float 0x41C809C000000000, float 0x41D809D000000000>, i8 %m, i32 4)
290290
ret <4 x float> %res
291291
}

llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,3 +1361,19 @@ define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind
13611361
%res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
13621362
ret <32 x half> %res
13631363
}
1364+
1365+
define <8 x half> @PR153570(ptr %p) {
1366+
; CHECK-LABEL: PR153570:
1367+
; CHECK: # %bb.0:
1368+
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1369+
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1370+
; CHECK-NEXT: vmulsh {rn-sae}, %xmm0, %xmm1, %xmm0
1371+
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1372+
; CHECK-NEXT: vmovsh %xmm2, %xmm1, %xmm1
1373+
; CHECK-NEXT: vmovaps %xmm1, (%rdi)
1374+
; CHECK-NEXT: retq
1375+
%r = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 0, i32 8)
1376+
store <8 x half> %r, ptr %p, align 16
1377+
%r1 = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <8 x half> <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, i8 1, i32 8)
1378+
ret <8 x half> %r1
1379+
}

0 commit comments

Comments
 (0)