Skip to content

Commit 7d52b72

Browse files
authored
[AMDGPU] Compute GISel KnownBits for S_BFE instructions (llvm#141588)
Next patches in the stack will emit them in the RegBankCombiner. With this, S_BFE instructions will hopefully interfere less with optimizations.
1 parent 4aa85cc commit 7d52b72

File tree

2 files changed

+304
-0
lines changed

2 files changed

+304
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16910,12 +16910,63 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
1691016910
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
1691116911
}
1691216912

16913+
static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
16914+
KnownBits &Known, const APInt &DemandedElts,
16915+
unsigned BFEWidth, bool SExt, unsigned Depth) {
16916+
const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
16917+
const MachineOperand &Src1 = MI.getOperand(2);
16918+
16919+
unsigned Src1Cst = 0;
16920+
if (Src1.isImm()) {
16921+
Src1Cst = Src1.getImm();
16922+
} else if (Src1.isReg()) {
16923+
auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
16924+
if (!Cst)
16925+
return;
16926+
Src1Cst = Cst->Value.getZExtValue();
16927+
} else {
16928+
return;
16929+
}
16930+
16931+
// Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit.
16932+
// Width is always [22:16].
16933+
const unsigned Offset =
16934+
Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6);
16935+
const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6);
16936+
16937+
if (Width >= BFEWidth) // Ill-formed.
16938+
return;
16939+
16940+
VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
16941+
Depth + 1);
16942+
16943+
Known = Known.extractBits(Width, Offset);
16944+
16945+
if (SExt)
16946+
Known = Known.sext(BFEWidth);
16947+
else
16948+
Known = Known.zext(BFEWidth);
16949+
}
16950+
1691316951
void SITargetLowering::computeKnownBitsForTargetInstr(
1691416952
GISelValueTracking &VT, Register R, KnownBits &Known,
1691516953
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
1691616954
unsigned Depth) const {
16955+
Known.resetAll();
1691716956
const MachineInstr *MI = MRI.getVRegDef(R);
1691816957
switch (MI->getOpcode()) {
16958+
case AMDGPU::S_BFE_I32:
16959+
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16960+
/*SExt=*/true, Depth);
16961+
case AMDGPU::S_BFE_U32:
16962+
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16963+
/*SExt=*/false, Depth);
16964+
case AMDGPU::S_BFE_I64:
16965+
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16966+
/*SExt=*/true, Depth);
16967+
case AMDGPU::S_BFE_U64:
16968+
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16969+
/*SExt=*/false, Depth);
1691916970
case AMDGPU::G_INTRINSIC:
1692016971
case AMDGPU::G_INTRINSIC_CONVERGENT: {
1692116972
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print<gisel-value-tracking>' %s -filetype=null 2>&1 | FileCheck %s
3+
4+
---
5+
name: test_s_bfe_u32_constants
6+
body: |
7+
bb.0:
8+
; Extract [12:16)
9+
; CHECK-LABEL: name: @test_s_bfe_u32_constants
10+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
11+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
12+
%cst:sgpr_32(s32) = G_CONSTANT i32 65535
13+
%bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc
14+
$sgpr0 = COPY %bfe
15+
...
16+
---
17+
name: test_s_bfe_i32_constants
18+
body: |
19+
bb.0:
20+
; Extract [12:16)
21+
; CHECK-LABEL: name: @test_s_bfe_i32_constants
22+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
23+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
24+
%cst:sgpr_32(s32) = G_CONSTANT i32 65535
25+
%bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc
26+
$sgpr0 = COPY %bfe
27+
...
28+
---
29+
name: test_s_bfe_u64_constants
30+
body: |
31+
bb.0:
32+
; Extract [12:16)
33+
; CHECK-LABEL: name: @test_s_bfe_u64_constants
34+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
35+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
36+
%cst:sgpr_64(s64) = G_CONSTANT i64 65535
37+
%bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc
38+
$sgpr0_sgpr1 = COPY %bfe
39+
...
40+
---
41+
name: test_s_bfe_i64_constants
42+
body: |
43+
bb.0:
44+
; Extract [12:16)
45+
; CHECK-LABEL: name: @test_s_bfe_i64_constants
46+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
47+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
48+
%cst:sgpr_64(s64) = G_CONSTANT i64 65535
49+
%bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc
50+
$sgpr0_sgpr1 = COPY %bfe
51+
...
52+
---
53+
name: test_s_bfe_u32_middle_bits_unknown
54+
body: |
55+
bb.0:
56+
; Extract [8:16) but the middle 4 bits are ????
57+
liveins: $sgpr0
58+
59+
; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown
60+
; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
61+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
62+
; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
63+
; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
64+
; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
65+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24
66+
%input:sgpr_32(s32) = COPY $sgpr0
67+
%cst:sgpr_32(s32) = G_CONSTANT i32 50175
68+
%mask:sgpr_32(s32) = G_CONSTANT i32 15360
69+
%masked_input:sgpr_32(s32) = G_AND %input, %mask
70+
%merged:sgpr_32(s32) = G_OR %masked_input, %cst
71+
%bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc
72+
$sgpr0 = COPY %bfe
73+
...
74+
---
75+
name: test_s_bfe_i32_middle_bits_unknown
76+
body: |
77+
bb.0:
78+
; Extract [8:16) but the middle 4 bits are ????
79+
liveins: $sgpr0
80+
81+
; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown
82+
; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
83+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
84+
; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
85+
; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
86+
; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
87+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26
88+
%input:sgpr_32(s32) = COPY $sgpr0
89+
%cst:sgpr_32(s32) = G_CONSTANT i32 50175
90+
%mask:sgpr_32(s32) = G_CONSTANT i32 15360
91+
%masked_input:sgpr_32(s32) = G_AND %input, %mask
92+
%merged:sgpr_32(s32) = G_OR %masked_input, %cst
93+
%bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc
94+
$sgpr0 = COPY %bfe
95+
...
96+
---
97+
name: test_s_bfe_u64_middle_bits_unknown
98+
body: |
99+
bb.0:
100+
; Extract [8:16) but the middle 4 bits are ????
101+
liveins: $sgpr0_sgpr1
102+
103+
; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown
104+
; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
105+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
106+
; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
107+
; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
108+
; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
109+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56
110+
%input:sgpr_64(s64) = COPY $sgpr0_sgpr1
111+
%cst:sgpr_64(s64) = G_CONSTANT i64 50175
112+
%mask:sgpr_64(s64) = G_CONSTANT i64 15360
113+
%masked_input:sgpr_64(s64) = G_AND %input, %mask
114+
%merged:sgpr_64(s64) = G_OR %masked_input, %cst
115+
%bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc
116+
$sgpr0_sgpr1 = COPY %bfe
117+
...
118+
---
119+
name: test_s_bfe_i64_middle_bits_unknown
120+
body: |
121+
bb.0:
122+
; Extract [8:16) but the middle 4 bits are ????
123+
liveins: $sgpr0_sgpr1
124+
125+
; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown
126+
; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
127+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
128+
; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
129+
; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
130+
; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
131+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58
132+
%input:sgpr_64(s64) = COPY $sgpr0_sgpr1
133+
%cst:sgpr_64(s64) = G_CONSTANT i64 50175
134+
%mask:sgpr_64(s64) = G_CONSTANT i64 15360
135+
%masked_input:sgpr_64(s64) = G_AND %input, %mask
136+
%merged:sgpr_64(s64) = G_OR %masked_input, %cst
137+
%bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
138+
$sgpr0_sgpr1 = COPY %bfe
139+
...
140+
---
141+
name: test_s_bfe_i32_g_constants
142+
body: |
143+
bb.0:
144+
; Extract [12:16)
145+
; CHECK-LABEL: name: @test_s_bfe_i32_g_constants
146+
; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
147+
; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
148+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
149+
%src0:sgpr_32(s32) = G_CONSTANT i32 65535
150+
%src1:sgpr_32(s32) = G_CONSTANT i32 262156
151+
%bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc
152+
$sgpr0 = COPY %bfe
153+
...
154+
---
155+
name: test_s_bfe_u64_g_constants
156+
body: |
157+
bb.0:
158+
; Extract [12:16)
159+
; CHECK-LABEL: name: @test_s_bfe_u64_g_constants
160+
; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
161+
; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
162+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
163+
%src0:sgpr_64(s64) = G_CONSTANT i64 65535
164+
%src1:sgpr_32(s32) = G_CONSTANT i32 262156
165+
%bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
166+
$sgpr0_sgpr1 = COPY %bfe
167+
...
168+
---
169+
name: test_s_bfe_i32_g_constants_lookthrough
170+
body: |
171+
bb.0:
172+
; Extract [12:16)
173+
; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough
174+
; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
175+
; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
176+
; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
177+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
178+
%src0:sgpr_32(s32) = G_CONSTANT i32 65535
179+
%src1:sgpr_32(s24) = G_CONSTANT i24 262156
180+
%src1_ext:sgpr_32(s32) = G_ZEXT %src1
181+
%bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc
182+
$sgpr0 = COPY %bfe
183+
...
184+
---
185+
name: test_s_bfe_u64_g_constants_lookthrough
186+
body: |
187+
bb.0:
188+
; Extract [12:16)
189+
; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough
190+
; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
191+
; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
192+
; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
193+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
194+
%src0:sgpr_64(s64) = G_CONSTANT i64 65535
195+
%src1:sgpr_32(s24) = G_CONSTANT i24 262156
196+
%src1_ext:sgpr_32(s32) = G_ZEXT %src1
197+
%bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
198+
$sgpr0_sgpr1 = COPY %bfe
199+
...
200+
---
201+
name: test_s_bfe_u32_trash_bits
202+
body: |
203+
bb.0:
204+
; Extract [12:16)
205+
; Check that the 6th bit is ignored for u32. The lower 6 bits are
206+
; 101100 but we should mask out the first 1 for the 32 bit version.
207+
; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits
208+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
209+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
210+
%cst:sgpr_32(s32) = G_CONSTANT i32 65535
211+
%bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc
212+
$sgpr0 = COPY %bfe
213+
...
214+
---
215+
name: test_s_bfe_i32_trash_bits
216+
body: |
217+
bb.0:
218+
; Extract [12:16)
219+
; Check that the 6th bit is ignored for i32. The lower 6 bits are
220+
; 101100 but we should mask out the first 1 for the 32 bit version.
221+
; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits
222+
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
223+
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
224+
%cst:sgpr_32(s32) = G_CONSTANT i32 65535
225+
%bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc
226+
$sgpr0 = COPY %bfe
227+
...
228+
---
229+
name: test_s_bfe_u64_constants_sixth_bit
230+
body: |
231+
bb.0:
232+
; Extract [32:48)
233+
; Check we correctly read 6 bits for the width on 64 bit BFEs.
234+
; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit
235+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
236+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
237+
%cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
238+
%bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc
239+
$sgpr0_sgpr1 = COPY %bfe
240+
...
241+
---
242+
name: test_s_bfe_i64_constants_sixth_bit
243+
body: |
244+
bb.0:
245+
; Extract [32:48)
246+
; Check we correctly read 6 bits for the width on 64 bit BFEs.
247+
; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit
248+
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
249+
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
250+
%cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
251+
%bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc
252+
$sgpr0_sgpr1 = COPY %bfe
253+
...

0 commit comments

Comments
 (0)