Skip to content

Commit 52f4c36

Browse files
authored
[X86] combineTruncate - trunc(srl(load(p),amt)) -> load(p+amt/8) - ensure amt doesn't depend on original load chain (#168400)
Relax fix for #165755 / #165850 - it doesn't matter if the amt is dependent on the original load value, just any users of the chain
1 parent 4c9020d commit 52f4c36

File tree

2 files changed

+103
-176
lines changed

2 files changed

+103
-176
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54688,11 +54688,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
5468854688
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
5468954689
// Check the shift amount is byte aligned.
5469054690
// Check the truncation doesn't use any shifted in (zero) top bits.
54691-
// Check the shift amount doesn't depend on the original load.
54691+
// Check the shift amount doesn't depend on the original load chain.
5469254692
if (KnownAmt.countMinTrailingZeros() >= 3 &&
5469354693
KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
5469454694
VT.getSizeInBits()) &&
54695-
!Ld->isPredecessorOf(ShAmt.getNode())) {
54695+
none_of(Ld->uses(), [&ShAmt](SDUse &Use) {
54696+
return Use.getResNo() == 1 &&
54697+
Use.getUser()->isPredecessorOf(ShAmt.getNode());
54698+
})) {
5469654699
EVT PtrVT = Ld->getBasePtr().getValueType();
5469754700
SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
5469854701
SDValue PtrByteOfs =

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 98 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,85 +1877,56 @@ define i32 @blsr_u512(ptr %word) nounwind {
18771877
; SSE: # %bb.0:
18781878
; SSE-NEXT: pushq %r15
18791879
; SSE-NEXT: pushq %r14
1880-
; SSE-NEXT: pushq %r12
18811880
; SSE-NEXT: pushq %rbx
1882-
; SSE-NEXT: pushq %rax
1883-
; SSE-NEXT: movq 56(%rdi), %rcx
1884-
; SSE-NEXT: movq 48(%rdi), %rdx
1885-
; SSE-NEXT: movq 40(%rdi), %rsi
1886-
; SSE-NEXT: movq 32(%rdi), %r11
1881+
; SSE-NEXT: movq 48(%rdi), %r11
1882+
; SSE-NEXT: movq 40(%rdi), %r9
18871883
; SSE-NEXT: movq 24(%rdi), %r8
1888-
; SSE-NEXT: movq 16(%rdi), %r9
1889-
; SSE-NEXT: movq (%rdi), %rax
1890-
; SSE-NEXT: movq 8(%rdi), %r10
1891-
; SSE-NEXT: rep bsfq %rax, %rbx
1892-
; SSE-NEXT: rep bsfq %r10, %r14
1893-
; SSE-NEXT: addq $64, %r14
1894-
; SSE-NEXT: testq %rax, %rax
1895-
; SSE-NEXT: cmovneq %rbx, %r14
1896-
; SSE-NEXT: rep bsfq %r9, %r15
1897-
; SSE-NEXT: rep bsfq %r8, %rbx
1884+
; SSE-NEXT: movq 16(%rdi), %rdx
1885+
; SSE-NEXT: movq (%rdi), %rcx
1886+
; SSE-NEXT: movq 8(%rdi), %rsi
1887+
; SSE-NEXT: rep bsfq %rcx, %rax
1888+
; SSE-NEXT: rep bsfq %rsi, %rbx
18981889
; SSE-NEXT: addq $64, %rbx
1899-
; SSE-NEXT: testq %r9, %r9
1900-
; SSE-NEXT: cmovneq %r15, %rbx
1901-
; SSE-NEXT: subq $-128, %rbx
1902-
; SSE-NEXT: movq %rax, %r15
1903-
; SSE-NEXT: movq %rax, %r12
1904-
; SSE-NEXT: orq %r10, %r12
1905-
; SSE-NEXT: cmovneq %r14, %rbx
1906-
; SSE-NEXT: rep bsfq %r11, %r12
1907-
; SSE-NEXT: rep bsfq %rsi, %r14
1908-
; SSE-NEXT: addq $64, %r14
1909-
; SSE-NEXT: testq %r11, %r11
1910-
; SSE-NEXT: cmovneq %r12, %r14
1911-
; SSE-NEXT: xorps %xmm0, %xmm0
1912-
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1913-
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1914-
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1915-
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
1916-
; SSE-NEXT: rep bsfq %rdx, %r12
1890+
; SSE-NEXT: testq %rcx, %rcx
1891+
; SSE-NEXT: cmovneq %rax, %rbx
1892+
; SSE-NEXT: rep bsfq %rdx, %rax
1893+
; SSE-NEXT: rep bsfq %r8, %r10
1894+
; SSE-NEXT: addq $64, %r10
1895+
; SSE-NEXT: testq %rdx, %rdx
1896+
; SSE-NEXT: cmovneq %rax, %r10
1897+
; SSE-NEXT: movq 32(%rdi), %r14
1898+
; SSE-NEXT: subq $-128, %r10
1899+
; SSE-NEXT: movq %rcx, %rax
1900+
; SSE-NEXT: orq %rsi, %rax
1901+
; SSE-NEXT: cmovneq %rbx, %r10
1902+
; SSE-NEXT: rep bsfq %r14, %rax
1903+
; SSE-NEXT: rep bsfq %r9, %rbx
1904+
; SSE-NEXT: addq $64, %rbx
1905+
; SSE-NEXT: testq %r14, %r14
1906+
; SSE-NEXT: cmovneq %rax, %rbx
1907+
; SSE-NEXT: rep bsfq %r11, %r15
19171908
; SSE-NEXT: movl $64, %eax
1918-
; SSE-NEXT: rep bsfq %rcx, %rax
1909+
; SSE-NEXT: rep bsfq 56(%rdi), %rax
19191910
; SSE-NEXT: addq $64, %rax
1920-
; SSE-NEXT: testq %rdx, %rdx
1921-
; SSE-NEXT: cmovneq %r12, %rax
1911+
; SSE-NEXT: testq %r11, %r11
1912+
; SSE-NEXT: cmovneq %r15, %rax
19221913
; SSE-NEXT: subq $-128, %rax
1923-
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
1924-
; SSE-NEXT: orq %rsi, %r11
1925-
; SSE-NEXT: cmovneq %r14, %rax
1926-
; SSE-NEXT: addq $256, %rax # imm = 0x100
1927-
; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
1928-
; SSE-NEXT: orq %r8, %r10
1929-
; SSE-NEXT: orq %r9, %r15
1930-
; SSE-NEXT: orq %r10, %r15
1914+
; SSE-NEXT: orq %r9, %r14
19311915
; SSE-NEXT: cmovneq %rbx, %rax
1932-
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1933-
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
1934-
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
1935-
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
1936-
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
1937-
; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
1916+
; SSE-NEXT: addq $256, %rax # imm = 0x100
1917+
; SSE-NEXT: orq %r8, %rsi
1918+
; SSE-NEXT: orq %rdx, %rcx
1919+
; SSE-NEXT: orq %rsi, %rcx
1920+
; SSE-NEXT: cmovneq %r10, %rax
1921+
; SSE-NEXT: movl $-2, %edx
1922+
; SSE-NEXT: movl %eax, %ecx
1923+
; SSE-NEXT: roll %cl, %edx
19381924
; SSE-NEXT: movl %eax, %ecx
1939-
; SSE-NEXT: andl $32, %ecx
1940-
; SSE-NEXT: movl %eax, %edx
1941-
; SSE-NEXT: andl $480, %edx # imm = 0x1E0
1942-
; SSE-NEXT: shrl $3, %edx
1943-
; SSE-NEXT: movl %edx, %esi
1944-
; SSE-NEXT: andl $-8, %esi
1945-
; SSE-NEXT: movq -128(%rsp,%rsi), %r8
1946-
; SSE-NEXT: shrq %cl, %r8
1947-
; SSE-NEXT: movl -120(%rsp,%rsi), %esi
1948-
; SSE-NEXT: addl %esi, %esi
1949-
; SSE-NEXT: notl %ecx
1950-
; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
1951-
; SSE-NEXT: shlq %cl, %rsi
1952-
; SSE-NEXT: orl %r8d, %esi
1953-
; SSE-NEXT: btrl %eax, %esi
1954-
; SSE-NEXT: movl %esi, (%rdi,%rdx)
1925+
; SSE-NEXT: shrl $3, %ecx
1926+
; SSE-NEXT: andl $60, %ecx
1927+
; SSE-NEXT: andl %edx, (%rdi,%rcx)
19551928
; SSE-NEXT: # kill: def $eax killed $eax killed $rax
1956-
; SSE-NEXT: addq $8, %rsp
19571929
; SSE-NEXT: popq %rbx
1958-
; SSE-NEXT: popq %r12
19591930
; SSE-NEXT: popq %r14
19601931
; SSE-NEXT: popq %r15
19611932
; SSE-NEXT: retq
@@ -1964,133 +1935,86 @@ define i32 @blsr_u512(ptr %word) nounwind {
19641935
; AVX2: # %bb.0:
19651936
; AVX2-NEXT: pushq %r15
19661937
; AVX2-NEXT: pushq %r14
1967-
; AVX2-NEXT: pushq %r13
1968-
; AVX2-NEXT: pushq %r12
19691938
; AVX2-NEXT: pushq %rbx
1970-
; AVX2-NEXT: movq 56(%rdi), %rcx
1971-
; AVX2-NEXT: movq 40(%rdi), %rdx
1972-
; AVX2-NEXT: movq 32(%rdi), %r11
1973-
; AVX2-NEXT: movq 24(%rdi), %rsi
1974-
; AVX2-NEXT: movq 16(%rdi), %r8
1975-
; AVX2-NEXT: movq (%rdi), %r9
1976-
; AVX2-NEXT: movq 8(%rdi), %r10
1977-
; AVX2-NEXT: xorl %ebx, %ebx
1978-
; AVX2-NEXT: tzcntq %r9, %rbx
1979-
; AVX2-NEXT: tzcntq %r10, %rax
1980-
; AVX2-NEXT: addq $64, %rax
1981-
; AVX2-NEXT: testq %r9, %r9
1982-
; AVX2-NEXT: cmovneq %rbx, %rax
1983-
; AVX2-NEXT: xorl %r14d, %r14d
1984-
; AVX2-NEXT: tzcntq %r8, %r14
1939+
; AVX2-NEXT: movq 40(%rdi), %r9
1940+
; AVX2-NEXT: movq 32(%rdi), %r10
1941+
; AVX2-NEXT: movq 24(%rdi), %r8
1942+
; AVX2-NEXT: movq 16(%rdi), %rdx
1943+
; AVX2-NEXT: movq (%rdi), %rcx
1944+
; AVX2-NEXT: movq 8(%rdi), %rsi
1945+
; AVX2-NEXT: tzcntq %rcx, %rax
19851946
; AVX2-NEXT: xorl %ebx, %ebx
19861947
; AVX2-NEXT: tzcntq %rsi, %rbx
19871948
; AVX2-NEXT: addq $64, %rbx
1988-
; AVX2-NEXT: testq %r8, %r8
1989-
; AVX2-NEXT: cmovneq %r14, %rbx
1990-
; AVX2-NEXT: subq $-128, %rbx
1991-
; AVX2-NEXT: movq %r9, %r14
1992-
; AVX2-NEXT: movq %r9, %r15
1993-
; AVX2-NEXT: orq %r10, %r15
1949+
; AVX2-NEXT: testq %rcx, %rcx
19941950
; AVX2-NEXT: cmovneq %rax, %rbx
19951951
; AVX2-NEXT: xorl %eax, %eax
1996-
; AVX2-NEXT: tzcntq %r11, %rax
1997-
; AVX2-NEXT: xorl %r12d, %r12d
1998-
; AVX2-NEXT: tzcntq %rdx, %r12
1999-
; AVX2-NEXT: addq $64, %r12
2000-
; AVX2-NEXT: testq %r11, %r11
2001-
; AVX2-NEXT: cmovneq %rax, %r12
2002-
; AVX2-NEXT: movq 48(%rdi), %r15
2003-
; AVX2-NEXT: xorl %r13d, %r13d
2004-
; AVX2-NEXT: tzcntq %r15, %r13
1952+
; AVX2-NEXT: tzcntq %rdx, %rax
1953+
; AVX2-NEXT: tzcntq %r8, %r11
1954+
; AVX2-NEXT: addq $64, %r11
1955+
; AVX2-NEXT: testq %rdx, %rdx
1956+
; AVX2-NEXT: cmovneq %rax, %r11
1957+
; AVX2-NEXT: subq $-128, %r11
1958+
; AVX2-NEXT: movq %rcx, %rax
1959+
; AVX2-NEXT: orq %rsi, %rax
1960+
; AVX2-NEXT: cmovneq %rbx, %r11
20051961
; AVX2-NEXT: xorl %eax, %eax
2006-
; AVX2-NEXT: tzcntq %rcx, %rax
1962+
; AVX2-NEXT: tzcntq %r10, %rax
1963+
; AVX2-NEXT: xorl %ebx, %ebx
1964+
; AVX2-NEXT: tzcntq %r9, %rbx
1965+
; AVX2-NEXT: addq $64, %rbx
1966+
; AVX2-NEXT: testq %r10, %r10
1967+
; AVX2-NEXT: cmovneq %rax, %rbx
1968+
; AVX2-NEXT: movq 48(%rdi), %r14
1969+
; AVX2-NEXT: xorl %r15d, %r15d
1970+
; AVX2-NEXT: tzcntq %r14, %r15
1971+
; AVX2-NEXT: xorl %eax, %eax
1972+
; AVX2-NEXT: tzcntq 56(%rdi), %rax
20071973
; AVX2-NEXT: addq $64, %rax
2008-
; AVX2-NEXT: testq %r15, %r15
2009-
; AVX2-NEXT: cmovneq %r13, %rax
1974+
; AVX2-NEXT: testq %r14, %r14
1975+
; AVX2-NEXT: cmovneq %r15, %rax
20101976
; AVX2-NEXT: subq $-128, %rax
2011-
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
2012-
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
2013-
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
2014-
; AVX2-NEXT: orq %rdx, %r11
2015-
; AVX2-NEXT: cmovneq %r12, %rax
2016-
; AVX2-NEXT: addq $256, %rax # imm = 0x100
2017-
; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
2018-
; AVX2-NEXT: orq %rsi, %r10
2019-
; AVX2-NEXT: orq %r8, %r14
2020-
; AVX2-NEXT: orq %r10, %r14
1977+
; AVX2-NEXT: orq %r9, %r10
20211978
; AVX2-NEXT: cmovneq %rbx, %rax
2022-
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
2023-
; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
2024-
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
2025-
; AVX2-NEXT: movq %r15, -{{[0-9]+}}(%rsp)
2026-
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
2027-
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
2028-
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
1979+
; AVX2-NEXT: addq $256, %rax # imm = 0x100
1980+
; AVX2-NEXT: orq %r8, %rsi
1981+
; AVX2-NEXT: orq %rdx, %rcx
1982+
; AVX2-NEXT: orq %rsi, %rcx
1983+
; AVX2-NEXT: cmovneq %r11, %rax
1984+
; AVX2-NEXT: movl $-2, %edx
1985+
; AVX2-NEXT: movl %eax, %ecx
1986+
; AVX2-NEXT: roll %cl, %edx
20291987
; AVX2-NEXT: movl %eax, %ecx
2030-
; AVX2-NEXT: andl $32, %ecx
2031-
; AVX2-NEXT: movl %eax, %edx
2032-
; AVX2-NEXT: andl $480, %edx # imm = 0x1E0
2033-
; AVX2-NEXT: shrl $3, %edx
2034-
; AVX2-NEXT: movl %edx, %esi
2035-
; AVX2-NEXT: andl $-8, %esi
2036-
; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %r8
2037-
; AVX2-NEXT: notl %ecx
2038-
; AVX2-NEXT: movl -120(%rsp,%rsi), %esi
2039-
; AVX2-NEXT: addl %esi, %esi
2040-
; AVX2-NEXT: shlxq %rcx, %rsi, %rcx
2041-
; AVX2-NEXT: orl %r8d, %ecx
2042-
; AVX2-NEXT: btrl %eax, %ecx
2043-
; AVX2-NEXT: movl %ecx, (%rdi,%rdx)
1988+
; AVX2-NEXT: shrl $3, %ecx
1989+
; AVX2-NEXT: andl $60, %ecx
1990+
; AVX2-NEXT: andl %edx, (%rdi,%rcx)
20441991
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
20451992
; AVX2-NEXT: popq %rbx
2046-
; AVX2-NEXT: popq %r12
2047-
; AVX2-NEXT: popq %r13
20481993
; AVX2-NEXT: popq %r14
20491994
; AVX2-NEXT: popq %r15
2050-
; AVX2-NEXT: vzeroupper
20511995
; AVX2-NEXT: retq
20521996
;
20531997
; AVX512-LABEL: blsr_u512:
20541998
; AVX512: # %bb.0:
2055-
; AVX512-NEXT: pushq %rax
2056-
; AVX512-NEXT: vmovups (%rdi), %ymm0
2057-
; AVX512-NEXT: vmovups 32(%rdi), %ymm1
2058-
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm2
2059-
; AVX512-NEXT: vpternlogd {{.*#+}} zmm3 = -1
2060-
; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm3
2061-
; AVX512-NEXT: vpandnq %zmm3, %zmm2, %zmm3
2062-
; AVX512-NEXT: vplzcntq %zmm3, %zmm3
2063-
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512]
2064-
; AVX512-NEXT: vpsubq %zmm3, %zmm4, %zmm3
2065-
; AVX512-NEXT: vptestmq %zmm2, %zmm2, %k1
2066-
; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512]
2067-
; AVX512-NEXT: vpcompressq %zmm3, %zmm2 {%k1}
2068-
; AVX512-NEXT: vmovq %xmm2, %rax
2069-
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
2070-
; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
2071-
; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
2072-
; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
2073-
; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
1999+
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
2000+
; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1
2001+
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm1
2002+
; AVX512-NEXT: vpandnq %zmm1, %zmm0, %zmm1
2003+
; AVX512-NEXT: vplzcntq %zmm1, %zmm1
2004+
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512]
2005+
; AVX512-NEXT: vpsubq %zmm1, %zmm2, %zmm1
2006+
; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1
2007+
; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512]
2008+
; AVX512-NEXT: vpcompressq %zmm1, %zmm0 {%k1}
2009+
; AVX512-NEXT: vmovq %xmm0, %rax
2010+
; AVX512-NEXT: movl $-2, %edx
2011+
; AVX512-NEXT: movl %eax, %ecx
2012+
; AVX512-NEXT: roll %cl, %edx
20742013
; AVX512-NEXT: movl %eax, %ecx
2075-
; AVX512-NEXT: andl $32, %ecx
2076-
; AVX512-NEXT: movl %ecx, %edx
2077-
; AVX512-NEXT: notl %edx
2078-
; AVX512-NEXT: movl %eax, %esi
2079-
; AVX512-NEXT: shrl $3, %esi
2080-
; AVX512-NEXT: movl %esi, %r8d
2081-
; AVX512-NEXT: andl $56, %r8d
2082-
; AVX512-NEXT: movl -120(%rsp,%r8), %r9d
2083-
; AVX512-NEXT: addl %r9d, %r9d
2084-
; AVX512-NEXT: shlxq %rdx, %r9, %rdx
20852014
; AVX512-NEXT: shrl $3, %ecx
2086-
; AVX512-NEXT: addq %rsp, %r8
2087-
; AVX512-NEXT: addq $-128, %r8
2088-
; AVX512-NEXT: orl (%rcx,%r8), %edx
2089-
; AVX512-NEXT: btrl %eax, %edx
2090-
; AVX512-NEXT: andl $60, %esi
2091-
; AVX512-NEXT: movl %edx, (%rdi,%rsi)
2015+
; AVX512-NEXT: andl $60, %ecx
2016+
; AVX512-NEXT: andl %edx, (%rdi,%rcx)
20922017
; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
2093-
; AVX512-NEXT: popq %rcx
20942018
; AVX512-NEXT: vzeroupper
20952019
; AVX512-NEXT: retq
20962020
%ld = load i512, ptr %word

0 commit comments

Comments
 (0)