Skip to content

Commit 5dafcb9

Browse files
committed
AMDGPU/GlobalISel: Use and instead of BFE with inline immediate
Zext from s1 is the only case where this should do anything with the current legal extensions. llvm-svn: 364760
1 parent 01bb075 commit 5dafcb9

File tree

4 files changed

+148
-10
lines changed

4 files changed

+148
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,13 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
626626
return true;
627627
}
628628

629+
/// \returns true if a bitmask for \p Size bits will be an inline immediate.
630+
static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
631+
Mask = maskTrailingOnes<unsigned>(Size);
632+
int SignedMask = static_cast<int>(Mask);
633+
return SignedMask >= -16 && SignedMask <= 64;
634+
}
635+
629636
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
630637
bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
631638
const DebugLoc &DL = I.getDebugLoc();
@@ -688,9 +695,17 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
688695

689696
if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
690697
// 64-bit should have been split up in RegBankSelect
691-
//
692-
// TODO: USE V_AND_B32 when the constant mask is an inline immediate for
693-
// unsigned for smaller code size.
698+
699+
// Try to use an and with a mask if it will save code size.
700+
unsigned Mask;
701+
if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
702+
MachineInstr *ExtI =
703+
BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
704+
.addImm(Mask)
705+
.addReg(SrcReg);
706+
return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
707+
}
708+
694709
const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
695710
MachineInstr *ExtI =
696711
BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
@@ -736,9 +751,17 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
736751
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
737752
}
738753

739-
BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
740-
.addReg(SrcReg)
741-
.addImm(SrcSize << 16);
754+
unsigned Mask;
755+
if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
756+
BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
757+
.addReg(SrcReg)
758+
.addImm(Mask);
759+
} else {
760+
BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
761+
.addReg(SrcReg)
762+
.addImm(SrcSize << 16);
763+
}
764+
742765
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
743766
}
744767

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ body: |
8181

8282
---
8383

84+
name: anyext_sgpr_s8_to_sgpr_s32
85+
legalized: true
86+
regBankSelected: true
87+
body: |
88+
bb.0:
89+
liveins: $sgpr0
90+
91+
; GCN-LABEL: name: anyext_sgpr_s8_to_sgpr_s32
92+
; GCN: $sgpr0 = COPY %2:sreg_32_xm0
93+
%0:sgpr(s32) = COPY $sgpr0
94+
%1:sgpr(s8) = G_TRUNC %0
95+
%2:sgpr(s32) = G_ANYEXT %1
96+
$sgpr0 = COPY %2
97+
98+
...
99+
100+
---
101+
84102
name: anyext_sgpr_s16_to_sgpr_s32
85103
legalized: true
86104
regBankSelected: true
@@ -154,6 +172,24 @@ body: |
154172

155173
---
156174

175+
name: anyext_vgpr_s8_to_vgpr_s32
176+
legalized: true
177+
regBankSelected: true
178+
body: |
179+
bb.0:
180+
liveins: $vgpr0
181+
182+
; GCN-LABEL: name: anyext_vgpr_s8_to_vgpr_s32
183+
; GCN: $vgpr0 = COPY %2:vgpr_32
184+
%0:vgpr(s32) = COPY $vgpr0
185+
%1:vgpr(s8) = G_TRUNC %0
186+
%2:vgpr(s32) = G_ANYEXT %1
187+
$vgpr0 = COPY %2
188+
189+
...
190+
191+
---
192+
157193
name: anyext_vgpr_s16_to_vgpr_s32
158194
legalized: true
159195
regBankSelected: true

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,25 @@ body: |
8787

8888
---
8989

90+
name: sext_sgpr_s8_to_sgpr_s32
91+
legalized: true
92+
regBankSelected: true
93+
body: |
94+
bb.0:
95+
liveins: $sgpr0
96+
97+
; GCN-LABEL: name: sext_sgpr_s8_to_sgpr_s32
98+
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
99+
; GCN: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32_xm0 = S_SEXT_I32_I8 [[COPY]]
100+
; GCN: $sgpr0 = COPY [[S_SEXT_I32_I8_]]
101+
%0:sgpr(s32) = COPY $sgpr0
102+
%1:sgpr(s8) = G_TRUNC %0
103+
%2:sgpr(s32) = G_SEXT %1
104+
$sgpr0 = COPY %2
105+
...
106+
107+
---
108+
90109
name: sext_sgpr_s16_to_sgpr_s32
91110
legalized: true
92111
regBankSelected: true
@@ -168,6 +187,26 @@ body: |
168187

169188
---
170189

190+
name: sext_vgpr_s8_to_vgpr_s32
191+
legalized: true
192+
regBankSelected: true
193+
body: |
194+
bb.0:
195+
liveins: $vgpr0
196+
197+
; GCN-LABEL: name: sext_vgpr_s8_to_vgpr_s32
198+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
199+
; GCN: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[COPY]], 0, 8, implicit $exec
200+
; GCN: $vgpr0 = COPY [[V_BFE_I32_]]
201+
%0:vgpr(s32) = COPY $vgpr0
202+
%1:vgpr(s8) = G_TRUNC %0
203+
%2:vgpr(s32) = G_SEXT %1
204+
$vgpr0 = COPY %2
205+
206+
...
207+
208+
---
209+
171210
name: sext_vgpr_s16_to_vgpr_s32
172211
legalized: true
173212
regBankSelected: true

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ body: |
5656
5757
; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32
5858
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
59-
; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 65536, implicit-def $scc
60-
; GCN: $sgpr0 = COPY [[S_BFE_U32_]]
59+
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], 1, implicit-def $scc
60+
; GCN: $sgpr0 = COPY [[S_AND_B32_]]
6161
%0:sgpr(s32) = COPY $sgpr0
6262
%1:sgpr(s1) = G_TRUNC %0
6363
%2:sgpr(s32) = G_ZEXT %1
@@ -87,6 +87,26 @@ body: |
8787

8888
---
8989

90+
name: zext_sgpr_s8_to_sgpr_s32
91+
legalized: true
92+
regBankSelected: true
93+
body: |
94+
bb.0:
95+
liveins: $sgpr0
96+
97+
; GCN-LABEL: name: zext_sgpr_s8_to_sgpr_s32
98+
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
99+
; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32_xm0 = S_BFE_U32 [[COPY]], 524288, implicit-def $scc
100+
; GCN: $sgpr0 = COPY [[S_BFE_U32_]]
101+
%0:sgpr(s32) = COPY $sgpr0
102+
%1:sgpr(s8) = G_TRUNC %0
103+
%2:sgpr(s32) = G_ZEXT %1
104+
$sgpr0 = COPY %2
105+
106+
...
107+
108+
---
109+
90110
name: zext_sgpr_s16_to_sgpr_s32
91111
legalized: true
92112
regBankSelected: true
@@ -158,8 +178,8 @@ body: |
158178
159179
; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32
160180
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
161-
; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 1, implicit $exec
162-
; GCN: $vgpr0 = COPY [[V_BFE_U32_]]
181+
; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
182+
; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
163183
%0:vgpr(s32) = COPY $vgpr0
164184
%1:vgpr(s1) = G_TRUNC %0
165185
%2:vgpr(s32) = G_ZEXT %1
@@ -168,6 +188,26 @@ body: |
168188

169189
---
170190

191+
name: zext_vgpr_s8_to_vgpr_s32
192+
legalized: true
193+
regBankSelected: true
194+
body: |
195+
bb.0:
196+
liveins: $vgpr0
197+
198+
; GCN-LABEL: name: zext_vgpr_s8_to_vgpr_s32
199+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
200+
; GCN: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[COPY]], 0, 8, implicit $exec
201+
; GCN: $vgpr0 = COPY [[V_BFE_U32_]]
202+
%0:vgpr(s32) = COPY $vgpr0
203+
%1:vgpr(s8) = G_TRUNC %0
204+
%2:vgpr(s32) = G_ZEXT %1
205+
$vgpr0 = COPY %2
206+
207+
...
208+
209+
---
210+
171211
name: zext_vgpr_s16_to_vgpr_s32
172212
legalized: true
173213
regBankSelected: true

0 commit comments

Comments
 (0)