@@ -1632,6 +1632,7 @@ entry:
16321632 ret i8 %res
16331633}
16341634
1635+ ; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
16351636define i16 @test_vector_reduce_smax_v2i16 (<2 x i16 > %v ) {
16361637; GFX7-SDAG-LABEL: test_vector_reduce_smax_v2i16:
16371638; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1678,7 +1679,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
16781679; GFX9-GISEL-LABEL: test_vector_reduce_smax_v2i16:
16791680; GFX9-GISEL: ; %bb.0: ; %entry
16801681; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1682+ ; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
16821683; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
16831684; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
16841685;
@@ -1692,7 +1693,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
16921693; GFX10-GISEL-LABEL: test_vector_reduce_smax_v2i16:
16931694; GFX10-GISEL: ; %bb.0: ; %entry
16941695; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695- ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1696+ ; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4 , v0, 16
16961697; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
16971698; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
16981699;
@@ -1713,7 +1714,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
17131714; GFX11-GISEL-LABEL: test_vector_reduce_smax_v2i16:
17141715; GFX11-GISEL: ; %bb.0: ; %entry
17151716; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716- ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1717+ ; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
17171718; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17181719; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
17191720; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1747,7 +1748,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
17471748; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
17481749; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
17491750; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1750- ; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1751+ ; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
17511752; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17521753; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
17531754; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1900,6 +1901,7 @@ entry:
19001901 ret i16 %res
19011902}
19021903
1904+ ; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
19031905define i16 @test_vector_reduce_smax_v4i16 (<4 x i16 > %v ) {
19041906; GFX7-SDAG-LABEL: test_vector_reduce_smax_v4i16:
19051907; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1961,7 +1963,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
19611963; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19621964; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19631965; GFX9-GISEL-NEXT: s_nop 0
1964- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1966+ ; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
19651967; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19661968; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
19671969;
@@ -1977,7 +1979,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
19771979; GFX10-GISEL: ; %bb.0: ; %entry
19781980; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19791981; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
1980- ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
1982+ ; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4 , v0, 16
19811983; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19821984; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
19831985;
@@ -2003,7 +2005,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
20032005; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20042006; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20052007; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2006- ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2008+ ; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
20072009; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20082010; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
20092011;
@@ -2041,14 +2043,15 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
20412043; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
20422044; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20432045; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2044- ; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2046+ ; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
20452047; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20462048; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
20472049entry:
20482050 %res = call i16 @llvm.vector.reduce.smax.v4i16 (<4 x i16 > %v )
20492051 ret i16 %res
20502052}
20512053
2054+ ; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
20522055define i16 @test_vector_reduce_smax_v8i16 (<8 x i16 > %v ) {
20532056; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
20542057; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2139,7 +2142,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21392142; GFX9-GISEL-NEXT: s_nop 0
21402143; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21412144; GFX9-GISEL-NEXT: s_nop 0
2142- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2145+ ; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
21432146; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21442147; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21452148;
@@ -2159,7 +2162,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21592162; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
21602163; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
21612164; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2162- ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2165+ ; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4 , v0, 16
21632166; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21642167; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
21652168;
@@ -2192,7 +2195,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21922195; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
21932196; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
21942197; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2195- ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2198+ ; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
21962199; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
21972200; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21982201; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2238,7 +2241,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
22382241; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
22392242; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22402243; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2241- ; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2244+ ; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
22422245; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
22432246; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
22442247; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2247,6 +2250,7 @@ entry:
22472250 ret i16 %res
22482251}
22492252
2253+ ; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
22502254define i16 @test_vector_reduce_smax_v16i16 (<16 x i16 > %v ) {
22512255; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
22522256; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2391,7 +2395,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
23912395; GFX9-GISEL-NEXT: s_nop 0
23922396; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
23932397; GFX9-GISEL-NEXT: s_nop 0
2394- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2398+ ; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
23952399; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
23962400; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23972401;
@@ -2419,7 +2423,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
24192423; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
24202424; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
24212425; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2422- ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2426+ ; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4 , v0, 16
24232427; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
24242428; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
24252429;
@@ -2467,7 +2471,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
24672471; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
24682472; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24692473; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2470- ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2474+ ; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
24712475; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
24722476; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
24732477; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2528,7 +2532,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
25282532; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
25292533; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25302534; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2531- ; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16 , v0
2535+ ; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0 , v0, 16
25322536; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
25332537; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
25342538; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
0 commit comments