Skip to content

Commit 21a65ff

Browse files
committed
[AMDGPU] Add regbankselect rules for G_FSHR
1 parent d57aa48 commit 21a65ff

File tree

3 files changed

+158
-21
lines changed

3 files changed

+158
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
461461
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
462462
break;
463463
}
464+
case AMDGPU::G_FSHR: {
465+
auto [X0, X1] = unpackAExt(MI.getOperand(1).getReg());
466+
auto [Y0, Y1] = unpackAExt(MI.getOperand(2).getReg());
467+
auto [S0, S1] = unpackZExt(MI.getOperand(3).getReg());
468+
469+
const RegisterBank *DstRB = MRI.getRegBank(MI.getOperand(0).getReg());
470+
Lo = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X0, Y0, S0}).getReg(0);
471+
Hi = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X1, Y1, S1}).getReg(0);
472+
break;
473+
}
464474
default:
465475
llvm_unreachable("Unpack lowering not implemented");
466476
}

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
514514
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
515515
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
516516

517+
addRulesForGOpcs({G_FSHR}, Standard)
518+
.Uni(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
519+
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
520+
.Uni(V2S16,
521+
{{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}, UnpackBitShift})
522+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
523+
.Uni(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
524+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
525+
.Uni(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}})
526+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}});
527+
517528
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
518529

519530
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir

Lines changed: 137 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,46 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
4+
5+
---
6+
name: fshr_s16_sss
7+
legalized: true
8+
9+
body: |
10+
bb.0:
11+
liveins: $sgpr0, $sgpr1, $sgpr2
12+
; CHECK-LABEL: name: fshr_s16_sss
13+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s16) = COPY $sgpr0
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s16) = COPY $sgpr1
17+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s16) = COPY $sgpr2
18+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
19+
%0:_(s16) = COPY $sgpr0
20+
%1:_(s16) = COPY $sgpr1
21+
%2:_(s16) = COPY $sgpr2
22+
%3:_(s16) = G_FSHR %0, %1, %2
23+
...
24+
25+
---
26+
name: fshr_s16_vvv
27+
legalized: true
28+
29+
body: |
30+
bb.0:
31+
liveins: $vgpr0, $vgpr1, $vgpr2
32+
; CHECK-LABEL: name: fshr_s16_vvv
33+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s16) = COPY $vgpr0
36+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY $vgpr1
37+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY $vgpr2
38+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
39+
%0:_(s16) = COPY $vgpr0
40+
%1:_(s16) = COPY $vgpr1
41+
%2:_(s16) = COPY $vgpr2
42+
%3:_(s16) = G_FSHR %0, %1, %2
43+
...
444

545
---
646
name: fshr_sss
@@ -15,10 +55,7 @@ body: |
1555
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1656
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
1757
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
18-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
19-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
20-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
21-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32)
58+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
2259
%0:_(s32) = COPY $sgpr0
2360
%1:_(s32) = COPY $sgpr1
2461
%2:_(s32) = COPY $sgpr2
@@ -37,9 +74,7 @@ body: |
3774
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
3875
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
3976
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
40-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
41-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
42-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32)
77+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
4378
%0:_(s32) = COPY $vgpr0
4479
%1:_(s32) = COPY $sgpr0
4580
%2:_(s32) = COPY $sgpr1
@@ -58,9 +93,7 @@ body: |
5893
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
5994
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
6095
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
61-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
62-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
63-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32)
96+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
6497
%0:_(s32) = COPY $sgpr0
6598
%1:_(s32) = COPY $vgpr0
6699
%2:_(s32) = COPY $sgpr1
@@ -79,9 +112,7 @@ body: |
79112
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
80113
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
81114
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
82-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
83-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
84-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32)
115+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
85116
%0:_(s32) = COPY $sgpr0
86117
%1:_(s32) = COPY $sgpr1
87118
%2:_(s32) = COPY $vgpr0
@@ -100,8 +131,7 @@ body: |
100131
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
101132
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
102133
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
103-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
104-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32)
134+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
105135
%0:_(s32) = COPY $vgpr0
106136
%1:_(s32) = COPY $vgpr1
107137
%2:_(s32) = COPY $sgpr0
@@ -120,8 +150,7 @@ body: |
120150
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
121151
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
122152
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
123-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
124-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32)
153+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
125154
%0:_(s32) = COPY $vgpr0
126155
%1:_(s32) = COPY $sgpr1
127156
%2:_(s32) = COPY $vgpr1
@@ -140,8 +169,7 @@ body: |
140169
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
141170
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
142171
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
143-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
144-
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32)
172+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
145173
%0:_(s32) = COPY $sgpr0
146174
%1:_(s32) = COPY $vgpr0
147175
%2:_(s32) = COPY $vgpr1
@@ -166,3 +194,91 @@ body: |
166194
%2:_(s32) = COPY $vgpr2
167195
%3:_(s32) = G_FSHR %0, %1, %2
168196
...
197+
198+
---
199+
name: fshr_v2s16_sss
200+
legalized: true
201+
202+
body: |
203+
bb.0:
204+
liveins: $sgpr0, $sgpr1, $sgpr2
205+
; CHECK-LABEL: name: fshr_v2s16_sss
206+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
207+
; CHECK-NEXT: {{ $}}
208+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
209+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
210+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2
211+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
212+
%0:_(<2 x s16>) = COPY $sgpr0
213+
%1:_(<2 x s16>) = COPY $sgpr1
214+
%2:_(<2 x s16>) = COPY $sgpr2
215+
%3:_(<2 x s16>) = G_FSHR %0, %1, %2
216+
...
217+
218+
---
219+
name: fshr_v2s16_vvv
220+
legalized: true
221+
222+
body: |
223+
bb.0:
224+
liveins: $vgpr0, $vgpr1, $vgpr2
225+
; CHECK-LABEL: name: fshr_v2s16_vvv
226+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
227+
; CHECK-NEXT: {{ $}}
228+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
229+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
230+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
231+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
232+
%0:_(<2 x s16>) = COPY $vgpr0
233+
%1:_(<2 x s16>) = COPY $vgpr1
234+
%2:_(<2 x s16>) = COPY $vgpr2
235+
%3:_(<2 x s16>) = G_FSHR %0, %1, %2
236+
...
237+
238+
---
239+
name: fshr_s64_sss
240+
legalized: true
241+
242+
body: |
243+
bb.0:
244+
liveins: $sgpr0, $sgpr1, $sgpr2
245+
; CHECK-LABEL: name: fshr_s64_sss
246+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
247+
; CHECK-NEXT: {{ $}}
248+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
249+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
250+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
251+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY]](s32)
252+
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY1]](s32)
253+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32)
254+
%0:_(s32) = COPY $sgpr0
255+
%1:_(s32) = COPY $sgpr1
256+
%2:_(s32) = COPY $sgpr2
257+
%3:_(s64) = G_ZEXT %0
258+
%4:_(s64) = G_ZEXT %1
259+
%5:_(s64) = G_FSHR %3, %4, %2
260+
...
261+
262+
---
263+
name: fshr_s64_vvv
264+
legalized: true
265+
266+
body: |
267+
bb.0:
268+
liveins: $vgpr0, $vgpr1, $vgpr2
269+
; CHECK-LABEL: name: fshr_s64_vvv
270+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
271+
; CHECK-NEXT: {{ $}}
272+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
273+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
274+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
275+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
276+
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY1]](s32)
277+
; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32)
278+
%0:_(s32) = COPY $vgpr0
279+
%1:_(s32) = COPY $vgpr1
280+
%2:_(s32) = COPY $vgpr2
281+
%3:_(s64) = G_ZEXT %0
282+
%4:_(s64) = G_ZEXT %1
283+
%5:_(s64) = G_FSHR %3, %4, %2
284+
...

0 commit comments

Comments
 (0)