Skip to content

Commit 1d5580f

Browse files
authored
[X86] combineTruncate - trunc(srl(load(p),amt)) -> load(p+amt/8) - ensure there isn't an interdependency between the load and amt (#165850)
Fixes #165755
1 parent b6a331b commit 1d5580f

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54768,9 +54768,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
5476854768
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
5476954769
// Check the shift amount is byte aligned.
5477054770
// Check the truncation doesn't use any shifted in (zero) top bits.
54771+
// Check the shift amount doesn't depend on the original load.
5477154772
if (KnownAmt.countMinTrailingZeros() >= 3 &&
5477254773
KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
54773-
VT.getSizeInBits())) {
54774+
VT.getSizeInBits()) &&
54775+
!Ld->isPredecessorOf(ShAmt.getNode())) {
5477454776
EVT PtrVT = Ld->getBasePtr().getValueType();
5477554777
SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
5477654778
SDValue PtrByteOfs =

llvm/test/CodeGen/X86/pr165755.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
4+
5+
define i32 @PR165755(ptr %p0) {
6+
; X86-LABEL: PR165755:
7+
; X86: # %bb.0:
8+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
9+
; X86-NEXT: movl (%ecx), %eax
10+
; X86-NEXT: movb $0, (%ecx)
11+
; X86-NEXT: retl
12+
;
13+
; X64-LABEL: PR165755:
14+
; X64: # %bb.0:
15+
; X64-NEXT: movl (%rdi), %eax
16+
; X64-NEXT: movb $0, (%rdi)
17+
; X64-NEXT: retq
18+
%ld64 = load i64, ptr %p0, align 8
19+
store i8 0, ptr %p0, align 1
20+
%ld32 = load i32, ptr %p0, align 8
21+
%mask = and i32 %ld32, 32
22+
%zext = zext i32 %mask to i64
23+
%srl = lshr i64 %ld64, %zext
24+
%res = trunc i64 %srl to i32
25+
ret i32 %res
26+
}

0 commit comments

Comments
 (0)