-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] Attempt to fold trunc(srl(load(p),amt) -> load(p+amt/8) #165266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d7e8d5a
b30012f
28988cc
2940b85
d99720c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54634,6 +54634,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, | |
| const X86Subtarget &Subtarget) { | ||
| EVT VT = N->getValueType(0); | ||
| SDValue Src = N->getOperand(0); | ||
| EVT SrcVT = Src.getValueType(); | ||
| SDLoc DL(N); | ||
|
|
||
| // Attempt to pre-truncate inputs to arithmetic ops instead. | ||
|
|
@@ -54652,6 +54653,39 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, | |
| if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) | ||
| return V; | ||
|
|
||
| // Fold trunc(srl(load(p),amt) -> load(p+amt/8) | ||
| // If we're shifting down whole byte+pow2 aligned bit chunks from a larger | ||
| // load for truncation, see if we can convert the shift into a pointer | ||
| // offset instead. Limit this to normal (non-ext) scalar integer loads. | ||
| if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && | ||
| Src.hasOneUse() && Src.getOperand(0).hasOneUse() && | ||
| ISD::isNormalLoad(Src.getOperand(0).getNode())) { | ||
| auto *Ld = cast<LoadSDNode>(Src.getOperand(0)); | ||
| if (Ld->isSimple() && VT.isByteSized() && | ||
| isPowerOf2_64(VT.getSizeInBits())) { | ||
| SDValue ShAmt = Src.getOperand(1); | ||
|
Comment on lines
+54663
to
+54666
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What did you have in mind? We check that that VT is byte sized (multiple of 8 bits) and that its pow2 - then check ShAmt is zero in the lowest bits matching the alignment of VT.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could relax this to just check that ShAmt byte aligned:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I get it now. It's less clear than |
||
| KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); | ||
| // Check the shift amount is aligned to the truncated size. | ||
| // Check the truncation doesn't use any shifted in (zero) top bits. | ||
| if (KnownAmt.countMinTrailingZeros() >= Log2_64(VT.getSizeInBits()) && | ||
| KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() - | ||
| VT.getSizeInBits())) { | ||
| EVT PtrVT = Ld->getBasePtr().getValueType(); | ||
| SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT); | ||
| SDValue PtrByteOfs = | ||
| DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, | ||
| DAG.getShiftAmountConstant(3, PtrVT, DL)); | ||
| SDValue NewPtr = DAG.getMemBasePlusOffset( | ||
| Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap); | ||
| SDValue NewLoad = | ||
| DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getMemOperand()); | ||
| DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), | ||
| NewLoad.getValue(1)); | ||
| return NewLoad; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // The bitcast source is a direct mmx result. | ||
| // Detect bitcasts between i32 to x86mmx | ||
| if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.