@@ -1881,6 +1881,141 @@ define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
18811881 ret i65 %result
18821882}
18831883
1884+ define <4 x i2 > @v_ashr_v4i2 (<4 x i2 > %value , <4 x i2 > %amount ) {
1885+ ; GFX6-LABEL: v_ashr_v4i2:
1886+ ; GFX6: ; %bb.0:
1887+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1888+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v4
1889+ ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 2
1890+ ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0
1891+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v5
1892+ ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 2
1893+ ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1
1894+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v6
1895+ ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 2
1896+ ; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2
1897+ ; GFX6-NEXT: v_and_b32_e32 v4, 3, v7
1898+ ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 2
1899+ ; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3
1900+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
1901+ ;
1902+ ; GFX8-LABEL: v_ashr_v4i2:
1903+ ; GFX8: ; %bb.0:
1904+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1905+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v4
1906+ ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 2
1907+ ; GFX8-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1908+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v5
1909+ ; GFX8-NEXT: v_bfe_i32 v1, v1, 0, 2
1910+ ; GFX8-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1911+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v6
1912+ ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 2
1913+ ; GFX8-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1914+ ; GFX8-NEXT: v_and_b32_e32 v4, 3, v7
1915+ ; GFX8-NEXT: v_bfe_i32 v3, v3, 0, 2
1916+ ; GFX8-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1917+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
1918+ ;
1919+ ; GFX9-LABEL: v_ashr_v4i2:
1920+ ; GFX9: ; %bb.0:
1921+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1922+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v4
1923+ ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 2
1924+ ; GFX9-NEXT: v_ashrrev_i16_e32 v0, v4, v0
1925+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v5
1926+ ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 2
1927+ ; GFX9-NEXT: v_ashrrev_i16_e32 v1, v4, v1
1928+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v6
1929+ ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 2
1930+ ; GFX9-NEXT: v_ashrrev_i16_e32 v2, v4, v2
1931+ ; GFX9-NEXT: v_and_b32_e32 v4, 3, v7
1932+ ; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 2
1933+ ; GFX9-NEXT: v_ashrrev_i16_e32 v3, v4, v3
1934+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
1935+ ;
1936+ ; GFX10PLUS-LABEL: v_ashr_v4i2:
1937+ ; GFX10PLUS: ; %bb.0:
1938+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, 3, v4
1940+ ; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2
1941+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, 3, v5
1942+ ; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2
1943+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, 3, v6
1944+ ; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2
1945+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, 3, v7
1946+ ; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2
1947+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v4, v0
1948+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v1, v5, v1
1949+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v2, v6, v2
1950+ ; GFX10PLUS-NEXT: v_ashrrev_i16 v3, v7, v3
1951+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1952+ %result = ashr <4 x i2 > %value , %amount
1953+ ret <4 x i2 > %result
1954+ }
1955+
1956+ ; arithmetic shifts of an i1 are identity operations
1957+ define amdgpu_ps <4 x i1 > @s_ashr_v4i1 (<4 x i1 > inreg %value , <4 x i1 > inreg %amount ) {
1958+ ; GCN-LABEL: s_ashr_v4i1:
1959+ ; GCN: ; %bb.0:
1960+ ; GCN-NEXT: ; return to shader part epilog
1961+ ;
1962+ ; GFX10PLUS-LABEL: s_ashr_v4i1:
1963+ ; GFX10PLUS: ; %bb.0:
1964+ ; GFX10PLUS-NEXT: ; return to shader part epilog
1965+ %result = ashr <4 x i1 > %value , %amount
1966+ ret <4 x i1 > %result
1967+ }
1968+
1969+ define <4 x i1 > @v_ashr_v4i1 (<4 x i1 > %value , <4 x i1 > %amount ) {
1970+ ; GCN-LABEL: v_ashr_v4i1:
1971+ ; GCN: ; %bb.0:
1972+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1973+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1974+ ;
1975+ ; GFX10PLUS-LABEL: v_ashr_v4i1:
1976+ ; GFX10PLUS: ; %bb.0:
1977+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1979+ %result = ashr <4 x i1 > %value , %amount
1980+ ret <4 x i1 > %result
1981+ }
1982+
1983+ define amdgpu_ps <4 x i2 > @s_ashr_v4i2 (<4 x i2 > inreg %value , <4 x i2 > inreg %amount ) {
1984+ ; GCN-LABEL: s_ashr_v4i2:
1985+ ; GCN: ; %bb.0:
1986+ ; GCN-NEXT: s_and_b32 s4, s4, 3
1987+ ; GCN-NEXT: s_bfe_i32 s0, s0, 0x20000
1988+ ; GCN-NEXT: s_ashr_i32 s0, s0, s4
1989+ ; GCN-NEXT: s_and_b32 s4, s5, 3
1990+ ; GCN-NEXT: s_bfe_i32 s1, s1, 0x20000
1991+ ; GCN-NEXT: s_ashr_i32 s1, s1, s4
1992+ ; GCN-NEXT: s_and_b32 s4, s6, 3
1993+ ; GCN-NEXT: s_bfe_i32 s2, s2, 0x20000
1994+ ; GCN-NEXT: s_ashr_i32 s2, s2, s4
1995+ ; GCN-NEXT: s_and_b32 s4, s7, 3
1996+ ; GCN-NEXT: s_bfe_i32 s3, s3, 0x20000
1997+ ; GCN-NEXT: s_ashr_i32 s3, s3, s4
1998+ ; GCN-NEXT: ; return to shader part epilog
1999+ ;
2000+ ; GFX10PLUS-LABEL: s_ashr_v4i2:
2001+ ; GFX10PLUS: ; %bb.0:
2002+ ; GFX10PLUS-NEXT: s_and_b32 s4, s4, 3
2003+ ; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x20000
2004+ ; GFX10PLUS-NEXT: s_and_b32 s5, s5, 3
2005+ ; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x20000
2006+ ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4
2007+ ; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5
2008+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, 3
2009+ ; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x20000
2010+ ; GFX10PLUS-NEXT: s_and_b32 s5, s7, 3
2011+ ; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x20000
2012+ ; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s4
2013+ ; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s5
2014+ ; GFX10PLUS-NEXT: ; return to shader part epilog
2015+ %result = ashr <4 x i2 > %value , %amount
2016+ ret <4 x i2 > %result
2017+ }
2018+
18842019; FIXME: Argument lowering asserts
18852020; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
18862021; %result = ashr <2 x i65> %value, %amount
0 commit comments