@@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
16761676 %cmp = icmp ne i4096 %test , 0
16771677 ret i1 %cmp
16781678}
1679+
1680+ ; Special Cases
1681+
1682+ ; Multiple uses of the stored value
1683+ define i1 @complement_cmpz_i128 (ptr %word , i32 %position ) nounwind {
1684+ ; X86-LABEL: complement_cmpz_i128:
1685+ ; X86: # %bb.0:
1686+ ; X86-NEXT: pushl %ebp
1687+ ; X86-NEXT: movl %esp, %ebp
1688+ ; X86-NEXT: pushl %ebx
1689+ ; X86-NEXT: pushl %edi
1690+ ; X86-NEXT: pushl %esi
1691+ ; X86-NEXT: andl $-16, %esp
1692+ ; X86-NEXT: subl $64, %esp
1693+ ; X86-NEXT: movzbl 12(%ebp), %ecx
1694+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1695+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1696+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1697+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1698+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1699+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1700+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1701+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1702+ ; X86-NEXT: movl %ecx, %eax
1703+ ; X86-NEXT: shrb $3, %al
1704+ ; X86-NEXT: andb $12, %al
1705+ ; X86-NEXT: negb %al
1706+ ; X86-NEXT: movsbl %al, %esi
1707+ ; X86-NEXT: movl 36(%esp,%esi), %eax
1708+ ; X86-NEXT: movl 40(%esp,%esi), %edi
1709+ ; X86-NEXT: movl %edi, %edx
1710+ ; X86-NEXT: shldl %cl, %eax, %edx
1711+ ; X86-NEXT: movl 32(%esp,%esi), %ebx
1712+ ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1713+ ; X86-NEXT: movl 44(%esp,%esi), %esi
1714+ ; X86-NEXT: shldl %cl, %edi, %esi
1715+ ; X86-NEXT: movl %ebx, %edi
1716+ ; X86-NEXT: shll %cl, %edi
1717+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1718+ ; X86-NEXT: shldl %cl, %ebx, %eax
1719+ ; X86-NEXT: movl 8(%ebp), %ecx
1720+ ; X86-NEXT: xorl 12(%ecx), %esi
1721+ ; X86-NEXT: xorl 8(%ecx), %edx
1722+ ; X86-NEXT: xorl 4(%ecx), %eax
1723+ ; X86-NEXT: xorl (%ecx), %edi
1724+ ; X86-NEXT: movl %edx, 8(%ecx)
1725+ ; X86-NEXT: movl %esi, 12(%ecx)
1726+ ; X86-NEXT: movl %edi, (%ecx)
1727+ ; X86-NEXT: movl %eax, 4(%ecx)
1728+ ; X86-NEXT: orl %esi, %eax
1729+ ; X86-NEXT: orl %edx, %edi
1730+ ; X86-NEXT: orl %eax, %edi
1731+ ; X86-NEXT: setne %al
1732+ ; X86-NEXT: leal -12(%ebp), %esp
1733+ ; X86-NEXT: popl %esi
1734+ ; X86-NEXT: popl %edi
1735+ ; X86-NEXT: popl %ebx
1736+ ; X86-NEXT: popl %ebp
1737+ ; X86-NEXT: retl
1738+ ;
1739+ ; SSE-LABEL: complement_cmpz_i128:
1740+ ; SSE: # %bb.0:
1741+ ; SSE-NEXT: movl %esi, %ecx
1742+ ; SSE-NEXT: movl $1, %eax
1743+ ; SSE-NEXT: xorl %edx, %edx
1744+ ; SSE-NEXT: shldq %cl, %rax, %rdx
1745+ ; SSE-NEXT: shlq %cl, %rax
1746+ ; SSE-NEXT: xorl %esi, %esi
1747+ ; SSE-NEXT: testb $64, %cl
1748+ ; SSE-NEXT: cmovneq %rax, %rdx
1749+ ; SSE-NEXT: cmovneq %rsi, %rax
1750+ ; SSE-NEXT: xorq 8(%rdi), %rdx
1751+ ; SSE-NEXT: xorq (%rdi), %rax
1752+ ; SSE-NEXT: movq %rax, (%rdi)
1753+ ; SSE-NEXT: movq %rdx, 8(%rdi)
1754+ ; SSE-NEXT: orq %rdx, %rax
1755+ ; SSE-NEXT: setne %al
1756+ ; SSE-NEXT: retq
1757+ ;
1758+ ; AVX2-LABEL: complement_cmpz_i128:
1759+ ; AVX2: # %bb.0:
1760+ ; AVX2-NEXT: movl %esi, %ecx
1761+ ; AVX2-NEXT: movl $1, %eax
1762+ ; AVX2-NEXT: xorl %edx, %edx
1763+ ; AVX2-NEXT: shldq %cl, %rax, %rdx
1764+ ; AVX2-NEXT: xorl %esi, %esi
1765+ ; AVX2-NEXT: shlxq %rcx, %rax, %rax
1766+ ; AVX2-NEXT: testb $64, %cl
1767+ ; AVX2-NEXT: cmovneq %rax, %rdx
1768+ ; AVX2-NEXT: cmovneq %rsi, %rax
1769+ ; AVX2-NEXT: xorq 8(%rdi), %rdx
1770+ ; AVX2-NEXT: xorq (%rdi), %rax
1771+ ; AVX2-NEXT: movq %rax, (%rdi)
1772+ ; AVX2-NEXT: movq %rdx, 8(%rdi)
1773+ ; AVX2-NEXT: orq %rdx, %rax
1774+ ; AVX2-NEXT: setne %al
1775+ ; AVX2-NEXT: retq
1776+ ;
1777+ ; AVX512-LABEL: complement_cmpz_i128:
1778+ ; AVX512: # %bb.0:
1779+ ; AVX512-NEXT: movl %esi, %ecx
1780+ ; AVX512-NEXT: xorl %eax, %eax
1781+ ; AVX512-NEXT: movl $1, %edx
1782+ ; AVX512-NEXT: xorl %esi, %esi
1783+ ; AVX512-NEXT: shldq %cl, %rdx, %rsi
1784+ ; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1785+ ; AVX512-NEXT: testb $64, %cl
1786+ ; AVX512-NEXT: cmovneq %rdx, %rsi
1787+ ; AVX512-NEXT: cmovneq %rax, %rdx
1788+ ; AVX512-NEXT: xorq 8(%rdi), %rsi
1789+ ; AVX512-NEXT: xorq (%rdi), %rdx
1790+ ; AVX512-NEXT: movq %rdx, (%rdi)
1791+ ; AVX512-NEXT: movq %rsi, 8(%rdi)
1792+ ; AVX512-NEXT: orq %rsi, %rdx
1793+ ; AVX512-NEXT: setne %al
1794+ ; AVX512-NEXT: retq
1795+ %rem = and i32 %position , 127
1796+ %ofs = zext nneg i32 %rem to i128
1797+ %bit = shl nuw i128 1 , %ofs
1798+ %ld = load i128 , ptr %word
1799+ %res = xor i128 %ld , %bit
1800+ store i128 %res , ptr %word
1801+ %cmp = icmp ne i128 %res , 0
1802+ ret i1 %cmp
1803+ }
1804+
1805+ ; Multiple loads in store chain
1806+ define i32 @reset_multiload_i128 (ptr %word , i32 %position , ptr %p ) nounwind {
1807+ ; X86-LABEL: reset_multiload_i128:
1808+ ; X86: # %bb.0:
1809+ ; X86-NEXT: pushl %ebp
1810+ ; X86-NEXT: movl %esp, %ebp
1811+ ; X86-NEXT: pushl %ebx
1812+ ; X86-NEXT: pushl %edi
1813+ ; X86-NEXT: pushl %esi
1814+ ; X86-NEXT: andl $-16, %esp
1815+ ; X86-NEXT: subl $64, %esp
1816+ ; X86-NEXT: movl 12(%ebp), %ecx
1817+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1818+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1819+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1820+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1821+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1822+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1823+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1824+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1825+ ; X86-NEXT: movl %ecx, %eax
1826+ ; X86-NEXT: shrb $3, %al
1827+ ; X86-NEXT: andb $12, %al
1828+ ; X86-NEXT: negb %al
1829+ ; X86-NEXT: movsbl %al, %edi
1830+ ; X86-NEXT: movl 36(%esp,%edi), %edx
1831+ ; X86-NEXT: movl 40(%esp,%edi), %ebx
1832+ ; X86-NEXT: movl %ebx, %esi
1833+ ; X86-NEXT: shldl %cl, %edx, %esi
1834+ ; X86-NEXT: movl 32(%esp,%edi), %eax
1835+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1836+ ; X86-NEXT: movl 44(%esp,%edi), %edi
1837+ ; X86-NEXT: shldl %cl, %ebx, %edi
1838+ ; X86-NEXT: movl %eax, %ebx
1839+ ; X86-NEXT: # kill: def $cl killed $cl killed $ecx
1840+ ; X86-NEXT: shll %cl, %ebx
1841+ ; X86-NEXT: notl %ebx
1842+ ; X86-NEXT: movl 16(%ebp), %eax
1843+ ; X86-NEXT: movl (%eax), %eax
1844+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1845+ ; X86-NEXT: movl 12(%ebp), %eax
1846+ ; X86-NEXT: andl $96, %eax
1847+ ; X86-NEXT: shrl $3, %eax
1848+ ; X86-NEXT: movl 8(%ebp), %ecx
1849+ ; X86-NEXT: movl (%ecx,%eax), %eax
1850+ ; X86-NEXT: andl %ebx, (%ecx)
1851+ ; X86-NEXT: movl 12(%ebp), %ecx
1852+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1853+ ; X86-NEXT: shldl %cl, %ebx, %edx
1854+ ; X86-NEXT: notl %edx
1855+ ; X86-NEXT: movl 8(%ebp), %ebx
1856+ ; X86-NEXT: andl %edx, 4(%ebx)
1857+ ; X86-NEXT: notl %esi
1858+ ; X86-NEXT: andl %esi, 8(%ebx)
1859+ ; X86-NEXT: notl %edi
1860+ ; X86-NEXT: andl %edi, 12(%ebx)
1861+ ; X86-NEXT: btl %ecx, %eax
1862+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1863+ ; X86-NEXT: jae .LBB22_2
1864+ ; X86-NEXT: # %bb.1:
1865+ ; X86-NEXT: xorl %eax, %eax
1866+ ; X86-NEXT: .LBB22_2:
1867+ ; X86-NEXT: leal -12(%ebp), %esp
1868+ ; X86-NEXT: popl %esi
1869+ ; X86-NEXT: popl %edi
1870+ ; X86-NEXT: popl %ebx
1871+ ; X86-NEXT: popl %ebp
1872+ ; X86-NEXT: retl
1873+ ;
1874+ ; SSE-LABEL: reset_multiload_i128:
1875+ ; SSE: # %bb.0:
1876+ ; SSE-NEXT: movl %esi, %ecx
1877+ ; SSE-NEXT: movl $1, %esi
1878+ ; SSE-NEXT: xorl %r8d, %r8d
1879+ ; SSE-NEXT: shldq %cl, %rsi, %r8
1880+ ; SSE-NEXT: xorl %eax, %eax
1881+ ; SSE-NEXT: shlq %cl, %rsi
1882+ ; SSE-NEXT: testb $64, %cl
1883+ ; SSE-NEXT: cmovneq %rsi, %r8
1884+ ; SSE-NEXT: cmovneq %rax, %rsi
1885+ ; SSE-NEXT: notq %r8
1886+ ; SSE-NEXT: notq %rsi
1887+ ; SSE-NEXT: movl %ecx, %r9d
1888+ ; SSE-NEXT: andl $96, %r9d
1889+ ; SSE-NEXT: shrl $3, %r9d
1890+ ; SSE-NEXT: movl (%rdi,%r9), %r9d
1891+ ; SSE-NEXT: btl %ecx, %r9d
1892+ ; SSE-NEXT: jb .LBB22_2
1893+ ; SSE-NEXT: # %bb.1:
1894+ ; SSE-NEXT: movl (%rdx), %eax
1895+ ; SSE-NEXT: .LBB22_2:
1896+ ; SSE-NEXT: andq %r8, 8(%rdi)
1897+ ; SSE-NEXT: andq %rsi, (%rdi)
1898+ ; SSE-NEXT: # kill: def $eax killed $eax killed $rax
1899+ ; SSE-NEXT: retq
1900+ ;
1901+ ; AVX2-LABEL: reset_multiload_i128:
1902+ ; AVX2: # %bb.0:
1903+ ; AVX2-NEXT: movl %esi, %ecx
1904+ ; AVX2-NEXT: xorl %eax, %eax
1905+ ; AVX2-NEXT: movl $1, %r8d
1906+ ; AVX2-NEXT: xorl %esi, %esi
1907+ ; AVX2-NEXT: shldq %cl, %r8, %rsi
1908+ ; AVX2-NEXT: shlxq %rcx, %r8, %r8
1909+ ; AVX2-NEXT: testb $64, %cl
1910+ ; AVX2-NEXT: cmovneq %r8, %rsi
1911+ ; AVX2-NEXT: cmovneq %rax, %r8
1912+ ; AVX2-NEXT: notq %rsi
1913+ ; AVX2-NEXT: notq %r8
1914+ ; AVX2-NEXT: movl %ecx, %r9d
1915+ ; AVX2-NEXT: andl $96, %r9d
1916+ ; AVX2-NEXT: shrl $3, %r9d
1917+ ; AVX2-NEXT: movl (%rdi,%r9), %r9d
1918+ ; AVX2-NEXT: btl %ecx, %r9d
1919+ ; AVX2-NEXT: jb .LBB22_2
1920+ ; AVX2-NEXT: # %bb.1:
1921+ ; AVX2-NEXT: movl (%rdx), %eax
1922+ ; AVX2-NEXT: .LBB22_2:
1923+ ; AVX2-NEXT: andq %rsi, 8(%rdi)
1924+ ; AVX2-NEXT: andq %r8, (%rdi)
1925+ ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
1926+ ; AVX2-NEXT: retq
1927+ ;
1928+ ; AVX512-LABEL: reset_multiload_i128:
1929+ ; AVX512: # %bb.0:
1930+ ; AVX512-NEXT: movl %esi, %ecx
1931+ ; AVX512-NEXT: movl $1, %r8d
1932+ ; AVX512-NEXT: xorl %esi, %esi
1933+ ; AVX512-NEXT: shldq %cl, %r8, %rsi
1934+ ; AVX512-NEXT: xorl %eax, %eax
1935+ ; AVX512-NEXT: shlxq %rcx, %r8, %r8
1936+ ; AVX512-NEXT: testb $64, %cl
1937+ ; AVX512-NEXT: cmovneq %r8, %rsi
1938+ ; AVX512-NEXT: cmovneq %rax, %r8
1939+ ; AVX512-NEXT: notq %rsi
1940+ ; AVX512-NEXT: notq %r8
1941+ ; AVX512-NEXT: movl %ecx, %r9d
1942+ ; AVX512-NEXT: andl $96, %r9d
1943+ ; AVX512-NEXT: shrl $3, %r9d
1944+ ; AVX512-NEXT: movl (%rdi,%r9), %r9d
1945+ ; AVX512-NEXT: btl %ecx, %r9d
1946+ ; AVX512-NEXT: jb .LBB22_2
1947+ ; AVX512-NEXT: # %bb.1:
1948+ ; AVX512-NEXT: movl (%rdx), %eax
1949+ ; AVX512-NEXT: .LBB22_2:
1950+ ; AVX512-NEXT: andq %rsi, 8(%rdi)
1951+ ; AVX512-NEXT: andq %r8, (%rdi)
1952+ ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
1953+ ; AVX512-NEXT: retq
1954+ %rem = and i32 %position , 127
1955+ %ofs = zext nneg i32 %rem to i128
1956+ %bit = shl nuw i128 1 , %ofs
1957+ %mask = xor i128 %bit , -1
1958+ %ld = load i128 , ptr %word
1959+ %sel = load i32 , ptr %p
1960+ %test = and i128 %ld , %bit
1961+ %res = and i128 %ld , %mask
1962+ %cmp = icmp eq i128 %test , 0
1963+ store i128 %res , ptr %word
1964+ %ret = select i1 %cmp , i32 %sel , i32 0
1965+ ret i32 %ret
1966+ }
0 commit comments