Skip to content

Commit 167c00e

Browse files
authored
[AMDGPU][GlobalISel] Add register bank legalization for G_SMIN/G_SMAX/G_UMIN/G_UMAX (llvm#159821)
[AMDGPU][GlobalISel] Add register bank legalization for G_SMIN/G_SMAX/G_UMIN/G_UMAX This patch adds register bank legalization support for min/max operations in the AMDGPU GlobalISel pipeline. - Add support S16, S32, and V2S16 types - For V2S16 uniform operations implements UnpackMinMax lowering
1 parent 97031a6 commit 167c00e

15 files changed

+193
-136
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,38 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
468468
MI.eraseFromParent();
469469
}
470470

471+
void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
472+
Register Lo, Hi;
473+
switch (MI.getOpcode()) {
474+
case AMDGPU::G_SMIN:
475+
case AMDGPU::G_SMAX: {
476+
// For signed operations, use sign extension
477+
auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
478+
auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
479+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
480+
.getReg(0);
481+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
482+
.getReg(0);
483+
break;
484+
}
485+
case AMDGPU::G_UMIN:
486+
case AMDGPU::G_UMAX: {
487+
// For unsigned operations, use zero extension
488+
auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
489+
auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
490+
Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
491+
.getReg(0);
492+
Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
493+
.getReg(0);
494+
break;
495+
}
496+
default:
497+
llvm_unreachable("Unpack min/max lowering not implemented");
498+
}
499+
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
500+
MI.eraseFromParent();
501+
}
502+
471503
static bool isSignedBFE(MachineInstr &MI) {
472504
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
473505
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -654,6 +686,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
654686
}
655687
case UnpackBitShift:
656688
return lowerUnpackBitShift(MI);
689+
case UnpackMinMax:
690+
return lowerUnpackMinMax(MI);
657691
case Ext32To64: {
658692
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
659693
MachineInstrBuilder Hi;

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ class RegBankLegalizeHelper {
123123
void lowerSplitTo32(MachineInstr &MI);
124124
void lowerSplitTo32Select(MachineInstr &MI);
125125
void lowerSplitTo32SExtInReg(MachineInstr &MI);
126+
void lowerUnpackMinMax(MachineInstr &MI);
126127
};
127128

128129
} // end namespace AMDGPU

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,22 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
522522
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
523523
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
524524

525+
addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
526+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
527+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
528+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
529+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
530+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
531+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
532+
533+
addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
534+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
535+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
536+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
537+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
538+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
539+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
540+
525541
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
526542
// and G_FREEZE here, rest is trivially regbankselected earlier
527543
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ enum LoweringMethodID {
212212
VccExtToSel,
213213
UniExtToSel,
214214
UnpackBitShift,
215+
UnpackMinMax,
215216
S_BFE,
216217
V_BFE,
217218
VgprToVccCopy,

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
43

54
---
65
name: smax_s32_ss
@@ -188,8 +187,7 @@ body: |
188187
; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
189188
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
190189
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
191-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
192-
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
190+
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C]](s32)
193191
; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
194192
; CHECK-NEXT: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]]
195193
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
43

54
---
65
name: smin_s32_ss
@@ -191,8 +190,7 @@ body: |
191190
; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
192191
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
193192
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
194-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
195-
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
193+
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C]](s32)
196194
; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
197195
; CHECK-NEXT: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]]
198196
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
43

54
---
65
name: umax_s32_ss
@@ -186,15 +185,13 @@ body: |
186185
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
187186
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
188187
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
189-
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
190-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
191-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
192-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
188+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
189+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C]]
190+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
191+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
193192
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
194-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
195-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
196-
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
197-
; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
193+
; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C]]
194+
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
198195
; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]]
199196
; CHECK-NEXT: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]]
200197
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
43

54
---
65
name: umin_s32_ss
@@ -190,15 +189,13 @@ body: |
190189
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
191190
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
192191
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
193-
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
194-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
195-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
196-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
192+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
193+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C]]
194+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
195+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
197196
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
198-
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
199-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
200-
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
201-
; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
197+
; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C]]
198+
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
202199
; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]]
203200
; CHECK-NEXT: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]]
204201
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
55

66
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
77
; GFX89-LABEL: test_min_max_ValK0_K1_i32:

llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
55

66
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
77
; GFX89-LABEL: test_min_max_ValK0_K1_u32:

0 commit comments

Comments
 (0)