Skip to content

Commit c64e666

Browse files
committed
[AMDGPU] Add regbankselect rules for G_FSHR
1 parent 77cd34e commit c64e666

File tree

3 files changed

+184
-22
lines changed

3 files changed

+184
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
461461
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
462462
break;
463463
}
464+
case AMDGPU::G_FSHR: {
465+
auto [X0, X1] = unpackAExt(MI.getOperand(1).getReg());
466+
auto [Y0, Y1] = unpackAExt(MI.getOperand(2).getReg());
467+
auto [S0, S1] = unpackZExt(MI.getOperand(3).getReg());
468+
469+
const RegisterBank *DstRB = MRI.getRegBank(MI.getOperand(0).getReg());
470+
Lo = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X0, Y0, S0}).getReg(0);
471+
Hi = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X1, Y1, S1}).getReg(0);
472+
break;
473+
}
464474
default:
465475
llvm_unreachable("Unpack lowering not implemented");
466476
}

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
526526
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
527527
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
528528

529+
addRulesForGOpcs({G_FSHR}, Standard)
530+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt, Sgpr32ZExt}})
531+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
532+
.Uni(V2S16,
533+
{{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, UnpackBitShift})
534+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
535+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}})
536+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
537+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64, Sgpr32}})
538+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}});
539+
529540
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
530541

531542
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir

Lines changed: 163 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,77 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -verify-machineinstrs -o - %s | FileCheck %s
43

54
---
6-
name: fshr_sss
5+
name: fshr_s16_sss
76
legalized: true
87

98
body: |
109
bb.0:
1110
liveins: $sgpr0, $sgpr1, $sgpr2
12-
; CHECK-LABEL: name: fshr_sss
11+
; CHECK-LABEL: name: fshr_s16_sss
12+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s16) = COPY $sgpr0
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s16) = COPY $sgpr1
16+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s16) = COPY $sgpr2
17+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[COPY]](s16)
18+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[COPY1]](s16)
19+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[COPY2]](s16)
20+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[ANYEXT]], [[ANYEXT1]], [[ZEXT]](s32)
21+
%0:_(s16) = COPY $sgpr0
22+
%1:_(s16) = COPY $sgpr1
23+
%2:_(s16) = COPY $sgpr2
24+
%3:_(s16) = G_FSHR %0, %1, %2
25+
...
26+
27+
---
28+
name: fshr_s16_vvv
29+
legalized: true
30+
31+
body: |
32+
bb.0:
33+
liveins: $vgpr0, $vgpr1, $vgpr2
34+
; CHECK-LABEL: name: fshr_s16_vvv
35+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s16) = COPY $vgpr0
38+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY $vgpr1
39+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY $vgpr2
40+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
41+
%0:_(s16) = COPY $vgpr0
42+
%1:_(s16) = COPY $vgpr1
43+
%2:_(s16) = COPY $vgpr2
44+
%3:_(s16) = G_FSHR %0, %1, %2
45+
...
46+
47+
---
48+
name: fshr_s32_sss
49+
legalized: true
50+
51+
body: |
52+
bb.0:
53+
liveins: $sgpr0, $sgpr1, $sgpr2
54+
; CHECK-LABEL: name: fshr_s32_sss
1355
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
1456
; CHECK-NEXT: {{ $}}
1557
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1658
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
1759
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
18-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
19-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
20-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
21-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32)
60+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
2261
%0:_(s32) = COPY $sgpr0
2362
%1:_(s32) = COPY $sgpr1
2463
%2:_(s32) = COPY $sgpr2
2564
%3:_(s32) = G_FSHR %0, %1, %2
2665
...
66+
2767
---
28-
name: fshr_vss
68+
name: fshr_s32_vss
2969
legalized: true
3070

3171
body: |
3272
bb.0:
3373
liveins: $vgpr0, $sgpr0, $sgpr1
34-
; CHECK-LABEL: name: fshr_vss
74+
; CHECK-LABEL: name: fshr_s32_vss
3575
; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1
3676
; CHECK-NEXT: {{ $}}
3777
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -46,13 +86,13 @@ body: |
4686
%3:_(s32) = G_FSHR %0, %1, %2
4787
...
4888
---
49-
name: fshr_svs
89+
name: fshr_s32_svs
5090
legalized: true
5191

5292
body: |
5393
bb.0:
5494
liveins: $sgpr0, $vgpr0, $sgpr1
55-
; CHECK-LABEL: name: fshr_svs
95+
; CHECK-LABEL: name: fshr_s32_svs
5696
; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1
5797
; CHECK-NEXT: {{ $}}
5898
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -67,13 +107,13 @@ body: |
67107
%3:_(s32) = G_FSHR %0, %1, %2
68108
...
69109
---
70-
name: fshr_ssv
110+
name: fshr_s32_ssv
71111
legalized: true
72112

73113
body: |
74114
bb.0:
75115
liveins: $sgpr0, $sgpr1, $vgpr0
76-
; CHECK-LABEL: name: fshr_ssv
116+
; CHECK-LABEL: name: fshr_s32_ssv
77117
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0
78118
; CHECK-NEXT: {{ $}}
79119
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -88,13 +128,13 @@ body: |
88128
%3:_(s32) = G_FSHR %0, %1, %2
89129
...
90130
---
91-
name: fshr_vvs
131+
name: fshr_s32_vvs
92132
legalized: true
93133

94134
body: |
95135
bb.0:
96136
liveins: $vgpr0, $vgpr1, $sgpr0
97-
; CHECK-LABEL: name: fshr_vvs
137+
; CHECK-LABEL: name: fshr_s32_vvs
98138
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
99139
; CHECK-NEXT: {{ $}}
100140
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -108,13 +148,13 @@ body: |
108148
%3:_(s32) = G_FSHR %0, %1, %2
109149
...
110150
---
111-
name: fshr_vsv
151+
name: fshr_s32_vsv
112152
legalized: true
113153

114154
body: |
115155
bb.0:
116156
liveins: $vgpr0, $sgpr0, $vgpr1
117-
; CHECK-LABEL: name: fshr_vsv
157+
; CHECK-LABEL: name: fshr_s32_vsv
118158
; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1
119159
; CHECK-NEXT: {{ $}}
120160
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -128,13 +168,13 @@ body: |
128168
%3:_(s32) = G_FSHR %0, %1, %2
129169
...
130170
---
131-
name: fshr_svv
171+
name: fshr_s32_svv
132172
legalized: true
133173

134174
body: |
135175
bb.0:
136176
liveins: $sgpr0, $vgpr0, $vgpr1
137-
; CHECK-LABEL: name: fshr_svv
177+
; CHECK-LABEL: name: fshr_s32_svv
138178
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
139179
; CHECK-NEXT: {{ $}}
140180
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -148,13 +188,13 @@ body: |
148188
%3:_(s32) = G_FSHR %0, %1, %2
149189
...
150190
---
151-
name: fshr_vvv
191+
name: fshr_s32_vvv
152192
legalized: true
153193

154194
body: |
155195
bb.0:
156196
liveins: $vgpr0, $vgpr1, $vgpr2
157-
; CHECK-LABEL: name: fshr_vvv
197+
; CHECK-LABEL: name: fshr_s32_vvv
158198
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
159199
; CHECK-NEXT: {{ $}}
160200
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -166,3 +206,104 @@ body: |
166206
%2:_(s32) = COPY $vgpr2
167207
%3:_(s32) = G_FSHR %0, %1, %2
168208
...
209+
210+
---
211+
name: fshr_v2s16_sss
212+
legalized: true
213+
214+
body: |
215+
bb.0:
216+
liveins: $sgpr0, $sgpr1, $sgpr2
217+
; CHECK-LABEL: name: fshr_v2s16_sss
218+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
219+
; CHECK-NEXT: {{ $}}
220+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
221+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
222+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2
223+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
224+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
225+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
226+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
227+
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
228+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY2]](<2 x s16>)
229+
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
230+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST2]], [[C1]]
231+
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
232+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[BITCAST]], [[BITCAST1]], [[AND]](s32)
233+
; CHECK-NEXT: [[FSHR1:%[0-9]+]]:sgpr(s32) = G_FSHR [[LSHR]], [[LSHR1]], [[LSHR2]](s32)
234+
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[FSHR]](s32), [[FSHR1]](s32)
235+
%0:_(<2 x s16>) = COPY $sgpr0
236+
%1:_(<2 x s16>) = COPY $sgpr1
237+
%2:_(<2 x s16>) = COPY $sgpr2
238+
%3:_(<2 x s16>) = G_FSHR %0, %1, %2
239+
...
240+
241+
---
242+
name: fshr_v2s16_vvv
243+
legalized: true
244+
245+
body: |
246+
bb.0:
247+
liveins: $vgpr0, $vgpr1, $vgpr2
248+
; CHECK-LABEL: name: fshr_v2s16_vvv
249+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
250+
; CHECK-NEXT: {{ $}}
251+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
252+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
253+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
254+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
255+
%0:_(<2 x s16>) = COPY $vgpr0
256+
%1:_(<2 x s16>) = COPY $vgpr1
257+
%2:_(<2 x s16>) = COPY $vgpr2
258+
%3:_(<2 x s16>) = G_FSHR %0, %1, %2
259+
...
260+
261+
---
262+
name: fshr_s64_sss
263+
legalized: true
264+
265+
body: |
266+
bb.0:
267+
liveins: $sgpr0, $sgpr1, $sgpr2
268+
; CHECK-LABEL: name: fshr_s64_sss
269+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
270+
; CHECK-NEXT: {{ $}}
271+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
272+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
273+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
274+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
275+
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
276+
; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
277+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s64) = G_FSHR [[MV]], [[MV1]], [[COPY2]](s32)
278+
%0:_(s32) = COPY $sgpr0
279+
%1:_(s32) = COPY $sgpr1
280+
%2:_(s32) = COPY $sgpr2
281+
%3:_(s64) = G_ZEXT %0
282+
%4:_(s64) = G_ZEXT %1
283+
%5:_(s64) = G_FSHR %3, %4, %2
284+
...
285+
286+
---
287+
name: fshr_s64_vvv
288+
legalized: true
289+
290+
body: |
291+
bb.0:
292+
liveins: $vgpr0, $vgpr1, $vgpr2
293+
; CHECK-LABEL: name: fshr_s64_vvv
294+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
295+
; CHECK-NEXT: {{ $}}
296+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
297+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
298+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
299+
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
300+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
301+
; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
302+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s64) = G_FSHR [[MV]], [[MV1]], [[COPY2]](s32)
303+
%0:_(s32) = COPY $vgpr0
304+
%1:_(s32) = COPY $vgpr1
305+
%2:_(s32) = COPY $vgpr2
306+
%3:_(s64) = G_ZEXT %0
307+
%4:_(s64) = G_ZEXT %1
308+
%5:_(s64) = G_FSHR %3, %4, %2
309+
...

0 commit comments

Comments
 (0)