Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60832,3 +60832,24 @@ Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
return TargetLowering::getPrefLoopAlignment();
}

bool X86TargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't going to work in the general case, it will just help shuffles:
https://llvm.godbolt.org/z/no1Gc6fzT

SDValue V0 = peekThroughBitcasts(Op.getOperand(0));
SDValue V1 = peekThroughBitcasts(Op.getOperand(1));

if (V0.getOpcode() == ISD::MUL || V1.getOpcode() == ISD::MUL) {
SDNode *Mul = V0.getOpcode() == ISD::MUL ? V0.getNode() : V1.getNode();
SelectionDAG &DAG = TLO.DAG;
const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
const SDLoc DL(Mul);

if (SDValue V = combineMulToPMULDQ(Mul, DL, DAG, Subtarget)) {
DAG.ReplaceAllUsesWith(Mul, V.getNode());
return false;
}
}
}
return true;
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,9 @@ namespace llvm {

bool hasBitTest(SDValue X, SDValue Y) const override;

bool shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const override;

bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/X86/pr121456.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq -O3 | FileCheck %s

define <8 x i64> @pr121456(<8 x i64> %a, <8 x i64> %b) {
Copy link
Collaborator

@RKSimon RKSimon Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't name the test file after a pull request - "pr" is the old llvm term for problem report - the number should be based off a reported issue number (is there one?) - otherwise I'd probably suggest adding these tests to combine-pmuldq.ll instead

; CHECK-LABEL: pr121456:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%0 = and <8 x i64> %a, splat (i64 4294967295)
%1 = and <8 x i64> %b, splat (i64 4294967295)
%2 = mul nuw <8 x i64> %1, %0
%3 = bitcast <8 x i64> %2 to <16 x i32>
%4 = shufflevector <16 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison>, <16 x i32> %3, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
%5 = bitcast <16 x i32> %4 to <8 x i64>
ret <8 x i64> %5
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this be happening for v2i64/v4i64 as well on avx512dq targets? Please can you add test coverage for those as well.

Loading