Skip to content

Commit 9552e89

Browse files
authored
[X86] Remove X86ISD::VSHLDV/VSHRDV and use ISD::FSHL/FSHR opcodes directly (#157616)
Fixes [issue](#155591)
1 parent f628a54 commit 9552e89

File tree

4 files changed

+30
-39
lines changed

4 files changed

+30
-39
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
3121531215
unsigned NumElts = VT.getVectorNumElements();
3121631216

3121731217
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
31218-
if (IsFSHR)
31219-
std::swap(Op0, Op1);
3122031218

3122131219
if (IsCstSplat) {
31220+
if (IsFSHR)
31221+
std::swap(Op0, Op1);
3122231222
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
3122331223
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
3122431224
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
3122531225
{Op0, Op1, Imm}, DAG, Subtarget);
3122631226
}
31227-
return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
31227+
return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
3122831228
{Op0, Op1, Amt}, DAG, Subtarget);
3122931229
}
3123031230
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
@@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3513935139
NODE_NAME_CASE(VALIGN)
3514035140
NODE_NAME_CASE(VSHLD)
3514135141
NODE_NAME_CASE(VSHRD)
35142-
NODE_NAME_CASE(VSHLDV)
35143-
NODE_NAME_CASE(VSHRDV)
3514435142
NODE_NAME_CASE(PSHUFD)
3514535143
NODE_NAME_CASE(PSHUFHW)
3514635144
NODE_NAME_CASE(PSHUFLW)

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,7 @@ namespace llvm {
471471
// VBMI2 Concat & Shift.
472472
VSHLD,
473473
VSHRD,
474-
VSHLDV,
475-
VSHRDV,
474+
476475
// Shuffle Packed Values at 128-bit granularity.
477476
SHUF128,
478477
MOVDDUP,

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
1230012300
// VBMI2
1230112301
//===----------------------------------------------------------------------===//
1230212302

12303-
multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12303+
multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
1230412304
X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
1230512305
let Constraints = "$src1 = $dst",
1230612306
ExeDomain = VTI.ExeDomain in {
1230712307
defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
1230812308
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
1230912309
"$src3, $src2", "$src2, $src3",
12310-
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12310+
!if(SwapLR,
12311+
(VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))),
12312+
(VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>,
1231112313
T8, PD, EVEX, VVVV, Sched<[sched]>;
1231212314
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1231312315
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
1231412316
"$src3, $src2", "$src2, $src3",
12315-
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12316-
(VTI.VT (VTI.LdFrag addr:$src3))))>,
12317+
!if(SwapLR,
12318+
(VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))),
12319+
(VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>,
1231712320
T8, PD, EVEX, VVVV,
1231812321
Sched<[sched.Folded, sched.ReadAfterFold]>;
1231912322
}
1232012323
}
1232112324

12322-
multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12325+
multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
1232312326
X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12324-
: VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12327+
: VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> {
1232512328
let Constraints = "$src1 = $dst",
1232612329
ExeDomain = VTI.ExeDomain in
1232712330
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1232812331
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
1232912332
"${src3}"#VTI.BroadcastStr#", $src2",
1233012333
"$src2, ${src3}"#VTI.BroadcastStr,
12331-
(OpNode VTI.RC:$src1, VTI.RC:$src2,
12332-
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12334+
!if(SwapLR,
12335+
(OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))),
12336+
(OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>,
1233312337
T8, PD, EVEX, VVVV, EVEX_B,
1233412338
Sched<[sched.Folded, sched.ReadAfterFold]>;
1233512339
}
1233612340

12337-
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12341+
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
1233812342
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
1233912343
let Predicates = [HasVBMI2] in
12340-
defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12344+
defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
1234112345
EVEX_V512;
1234212346
let Predicates = [HasVBMI2, HasVLX] in {
12343-
defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12347+
defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
1234412348
EVEX_V256;
12345-
defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12349+
defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
1234612350
EVEX_V128;
1234712351
}
1234812352
}
1234912353

12350-
multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12354+
multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
1235112355
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
1235212356
let Predicates = [HasVBMI2] in
12353-
defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12357+
defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
1235412358
EVEX_V512;
1235512359
let Predicates = [HasVBMI2, HasVLX] in {
12356-
defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12360+
defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
1235712361
EVEX_V256;
12358-
defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12362+
defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
1235912363
EVEX_V128;
1236012364
}
1236112365
}
1236212366
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12363-
SDNode OpNode, X86SchedWriteWidths sched> {
12364-
defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12367+
SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> {
12368+
defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched,
1236512369
avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12366-
defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12370+
defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched,
1236712371
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12368-
defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12372+
defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched,
1236912373
avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1237012374
}
1237112375

@@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
1238112385
}
1238212386

1238312387
// Concat & Shift
12384-
defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12385-
defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12388+
defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>;
12389+
defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>;
1238612390
defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
1238712391
defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
1238812392

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
406406

407407
def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
408408
def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
409-
def X86VShldv : SDNode<"X86ISD::VSHLDV",
410-
SDTypeProfile<1, 3, [SDTCisVec<0>,
411-
SDTCisSameAs<0,1>,
412-
SDTCisSameAs<0,2>,
413-
SDTCisSameAs<0,3>]>>;
414-
def X86VShrdv : SDNode<"X86ISD::VSHRDV",
415-
SDTypeProfile<1, 3, [SDTCisVec<0>,
416-
SDTCisSameAs<0,1>,
417-
SDTCisSameAs<0,2>,
418-
SDTCisSameAs<0,3>]>>;
419409

420410
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
421411

0 commit comments

Comments
 (0)