Skip to content

Commit 3db1bb1

Browse files
committed
[AMDGPU][GlobalISel] Add register bank legalization for G_SMIN/G_SMAX/G_UMIN/G_UMAX - revert back to G_BUILD_VECTOR issue
1 parent 833dbf4 commit 3db1bb1

File tree

5 files changed

+80
-71
lines changed

5 files changed

+80
-71
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,13 +1363,7 @@ void RegBankLegalizeHelper::applyMappingTrivial(MachineInstr &MI) {
13631363
B.setInstr(MI);
13641364
for (unsigned i = NumDefs; i < NumOperands; ++i) {
13651365
Register Reg = MI.getOperand(i).getReg();
1366-
// Helper to check if a register should be skipped for VGPR conversion
1367-
auto shouldSkipVGPRConversion = [&](Register Reg) {
1368-
MachineInstr *DefMI = MRI.getVRegDef(Reg);
1369-
// Skip if defining instruction is implicit_def
1370-
return DefMI && DefMI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
1371-
};
1372-
if (MRI.getRegBank(Reg) != RB && !shouldSkipVGPRConversion(Reg)) {
1366+
if (MRI.getRegBank(Reg) != RB) {
13731367
auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
13741368
MI.getOperand(i).setReg(Copy.getReg(0));
13751369
}

llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,7 @@ entry:
16321632
ret i8 %res
16331633
}
16341634

1635+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
16351636
define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
16361637
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v2i16:
16371638
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1678,7 +1679,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
16781679
; GFX9-GISEL-LABEL: test_vector_reduce_smax_v2i16:
16791680
; GFX9-GISEL: ; %bb.0: ; %entry
16801681
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1682+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
16821683
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
16831684
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
16841685
;
@@ -1692,7 +1693,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
16921693
; GFX10-GISEL-LABEL: test_vector_reduce_smax_v2i16:
16931694
; GFX10-GISEL: ; %bb.0: ; %entry
16941695
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1696+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
16961697
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
16971698
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
16981699
;
@@ -1713,7 +1714,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
17131714
; GFX11-GISEL-LABEL: test_vector_reduce_smax_v2i16:
17141715
; GFX11-GISEL: ; %bb.0: ; %entry
17151716
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1717+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
17171718
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17181719
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
17191720
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1747,7 +1748,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
17471748
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
17481749
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
17491750
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1750-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1751+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
17511752
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17521753
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
17531754
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1900,6 +1901,7 @@ entry:
19001901
ret i16 %res
19011902
}
19021903

1904+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
19031905
define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
19041906
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v4i16:
19051907
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1961,7 +1963,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
19611963
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19621964
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19631965
; GFX9-GISEL-NEXT: s_nop 0
1964-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1966+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
19651967
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19661968
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
19671969
;
@@ -1977,7 +1979,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
19771979
; GFX10-GISEL: ; %bb.0: ; %entry
19781980
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19791981
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
1980-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1982+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
19811983
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
19821984
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
19831985
;
@@ -2003,7 +2005,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
20032005
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20042006
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20052007
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2006-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2008+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
20072009
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20082010
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
20092011
;
@@ -2041,14 +2043,15 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
20412043
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
20422044
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20432045
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2044-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2046+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
20452047
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
20462048
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
20472049
entry:
20482050
%res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
20492051
ret i16 %res
20502052
}
20512053

2054+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
20522055
define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
20532056
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
20542057
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2139,7 +2142,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21392142
; GFX9-GISEL-NEXT: s_nop 0
21402143
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21412144
; GFX9-GISEL-NEXT: s_nop 0
2142-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2145+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
21432146
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21442147
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21452148
;
@@ -2159,7 +2162,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21592162
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
21602163
; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
21612164
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2162-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2165+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
21632166
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21642167
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
21652168
;
@@ -2192,7 +2195,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
21922195
; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
21932196
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
21942197
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2195-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2198+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
21962199
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
21972200
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
21982201
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2238,7 +2241,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
22382241
; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
22392242
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22402243
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2241-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2244+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
22422245
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
22432246
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
22442247
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2247,6 +2250,7 @@ entry:
22472250
ret i16 %res
22482251
}
22492252

2253+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
22502254
define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
22512255
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
22522256
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2391,7 +2395,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
23912395
; GFX9-GISEL-NEXT: s_nop 0
23922396
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
23932397
; GFX9-GISEL-NEXT: s_nop 0
2394-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2398+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
23952399
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
23962400
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23972401
;
@@ -2419,7 +2423,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
24192423
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
24202424
; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
24212425
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2422-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2426+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
24232427
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
24242428
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
24252429
;
@@ -2467,7 +2471,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
24672471
; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
24682472
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24692473
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2470-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2474+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
24712475
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
24722476
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
24732477
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2528,7 +2532,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
25282532
; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
25292533
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25302534
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
2531-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2535+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
25322536
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
25332537
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
25342538
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,7 @@ entry:
16321632
ret i8 %res
16331633
}
16341634

1635+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
16351636
define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
16361637
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v2i16:
16371638
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1678,7 +1679,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
16781679
; GFX9-GISEL-LABEL: test_vector_reduce_smin_v2i16:
16791680
; GFX9-GISEL: ; %bb.0: ; %entry
16801681
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1682+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
16821683
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
16831684
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
16841685
;
@@ -1692,7 +1693,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
16921693
; GFX10-GISEL-LABEL: test_vector_reduce_smin_v2i16:
16931694
; GFX10-GISEL: ; %bb.0: ; %entry
16941695
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1696+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
16961697
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
16971698
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
16981699
;
@@ -1713,7 +1714,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
17131714
; GFX11-GISEL-LABEL: test_vector_reduce_smin_v2i16:
17141715
; GFX11-GISEL: ; %bb.0: ; %entry
17151716
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1716-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1717+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
17171718
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17181719
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
17191720
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1747,7 +1748,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
17471748
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
17481749
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
17491750
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1750-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1751+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
17511752
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
17521753
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
17531754
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1900,6 +1901,7 @@ entry:
19001901
ret i16 %res
19011902
}
19021903

1904+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
19031905
define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
19041906
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v4i16:
19051907
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1961,7 +1963,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
19611963
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19621964
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
19631965
; GFX9-GISEL-NEXT: s_nop 0
1964-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1966+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
19651967
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
19661968
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
19671969
;
@@ -1977,7 +1979,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
19771979
; GFX10-GISEL: ; %bb.0: ; %entry
19781980
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19791981
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
1980-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1982+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
19811983
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
19821984
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
19831985
;
@@ -2003,7 +2005,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
20032005
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20042006
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
20052007
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2006-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2008+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
20072009
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
20082010
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
20092011
;
@@ -2041,14 +2043,15 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
20412043
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
20422044
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
20432045
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2044-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2046+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
20452047
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
20462048
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
20472049
entry:
20482050
%res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
20492051
ret i16 %res
20502052
}
20512053

2054+
; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
20522055
define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
20532056
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16:
20542057
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2139,7 +2142,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
21392142
; GFX9-GISEL-NEXT: s_nop 0
21402143
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
21412144
; GFX9-GISEL-NEXT: s_nop 0
2142-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2145+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
21432146
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
21442147
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21452148
;
@@ -2159,7 +2162,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
21592162
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v2
21602163
; GFX10-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
21612164
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2162-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2165+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
21632166
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
21642167
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
21652168
;
@@ -2192,7 +2195,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
21922195
; GFX11-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
21932196
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
21942197
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2195-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2198+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
21962199
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
21972200
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
21982201
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2238,7 +2241,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
22382241
; GFX12-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
22392242
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22402243
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2241-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2244+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
22422245
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
22432246
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
22442247
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2391,7 +2394,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
23912394
; GFX9-GISEL-NEXT: s_nop 0
23922395
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
23932396
; GFX9-GISEL-NEXT: s_nop 0
2394-
; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2397+
; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
23952398
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
23962399
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23972400
;
@@ -2419,7 +2422,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
24192422
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v2
24202423
; GFX10-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
24212424
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2422-
; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2425+
; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
24232426
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
24242427
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
24252428
;
@@ -2467,7 +2470,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
24672470
; GFX11-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
24682471
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24692472
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2470-
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2473+
; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
24712474
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
24722475
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
24732476
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2528,7 +2531,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
25282531
; GFX12-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
25292532
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25302533
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
2531-
; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2534+
; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
25322535
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
25332536
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
25342537
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)