Skip to content

Commit be9eb91

Browse files
committed
[AMDGPU] Add regbankselect rules for G_ADD/SUB and variants
Introduce add/sub support for S64 and V2S16 types. Additionally, add rules for G_UADDO, G_USUBO, G_UADDE and G_USUBE as they are needed for S64 addition/subtraction.
1 parent d2ba0da commit be9eb91

File tree

7 files changed

+513
-16
lines changed

7 files changed

+513
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,16 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
470470
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
471471
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
472472
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
473-
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
473+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
474+
// Split 64-bit add/sub into two 32-bit ops on VGPRs
475+
.Uni(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, SplitTo32})
476+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, SplitTo32})
477+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}})
478+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
479+
480+
addRulesForGOpcs({G_UADDO, G_USUBO, G_UADDE, G_USUBE}, Standard)
481+
.Uni(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}})
482+
.Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
474483

475484
addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
476485

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -new-reg-bank-select < %s -o - | FileCheck %s
3+
4+
define i16 @add_i16(i16 %a, i16 %b) {
5+
; CHECK-LABEL: add_i16:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: v_add_u16_e32 v0, v0, v1
9+
; CHECK-NEXT: s_setpc_b64 s[30:31]
10+
%c = add i16 %a, %b
11+
ret i16 %c
12+
}
13+
14+
define i32 @add_i32(i32 %a, i32 %b) {
15+
; CHECK-LABEL: add_i32:
16+
; CHECK: ; %bb.0:
17+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+
; CHECK-NEXT: v_add_u32_e32 v0, v0, v1
19+
; CHECK-NEXT: s_setpc_b64 s[30:31]
20+
%c = add i32 %a, %b
21+
ret i32 %c
22+
}
23+
24+
define <2 x i16> @add_v2i16(<2 x i16> %a, <2 x i16> %b) {
25+
; CHECK-LABEL: add_v2i16:
26+
; CHECK: ; %bb.0:
27+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28+
; CHECK-NEXT: v_pk_add_u16 v0, v0, v1
29+
; CHECK-NEXT: s_setpc_b64 s[30:31]
30+
%c = add <2 x i16> %a, %b
31+
ret <2 x i16> %c
32+
}
33+
34+
define i64 @add_i64(i64 %a, i64 %b) {
35+
; CHECK-LABEL: add_i64:
36+
; CHECK: ; %bb.0:
37+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38+
; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
39+
; CHECK-NEXT: s_nop 1
40+
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
41+
; CHECK-NEXT: s_setpc_b64 s[30:31]
42+
%c = add i64 %a, %b
43+
ret i64 %c
44+
}
45+
46+
define i16 @sub_i16(i16 %a, i16 %b) {
47+
; CHECK-LABEL: sub_i16:
48+
; CHECK: ; %bb.0:
49+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50+
; CHECK-NEXT: v_sub_u16_e32 v0, v0, v1
51+
; CHECK-NEXT: s_setpc_b64 s[30:31]
52+
%c = sub i16 %a, %b
53+
ret i16 %c
54+
}
55+
56+
define i32 @sub_i32(i32 %a, i32 %b) {
57+
; CHECK-LABEL: sub_i32:
58+
; CHECK: ; %bb.0:
59+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60+
; CHECK-NEXT: v_sub_u32_e32 v0, v0, v1
61+
; CHECK-NEXT: s_setpc_b64 s[30:31]
62+
%c = sub i32 %a, %b
63+
ret i32 %c
64+
}
65+
66+
define <2 x i16> @sub_v2i16(<2 x i16> %a, <2 x i16> %b) {
67+
; CHECK-LABEL: sub_v2i16:
68+
; CHECK: ; %bb.0:
69+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70+
; CHECK-NEXT: v_pk_sub_i16 v0, v0, v1
71+
; CHECK-NEXT: s_setpc_b64 s[30:31]
72+
%c = sub <2 x i16> %a, %b
73+
ret <2 x i16> %c
74+
}
75+
76+
define i64 @sub_i64(i64 %a, i64 %b) {
77+
; CHECK-LABEL: sub_i64:
78+
; CHECK: ; %bb.0:
79+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80+
; CHECK-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
81+
; CHECK-NEXT: s_nop 1
82+
; CHECK-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
83+
; CHECK-NEXT: s_setpc_b64 s[30:31]
84+
%c = sub i64 %a, %b
85+
ret i64 %c
86+
}

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
44
---
55
name: add_s16_ss
66
legalized: true

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,46 @@ body: |
7474
%1:_(s32) = COPY $vgpr1
7575
%2:_(s32) = G_ADD %0, %1
7676
...
77+
78+
---
79+
name: uaddo_s32_vv
80+
legalized: true
81+
82+
body: |
83+
bb.0:
84+
liveins: $vgpr0, $vgpr1
85+
; CHECK-LABEL: name: uaddo_s32_vv
86+
; CHECK: liveins: $vgpr0, $vgpr1
87+
; CHECK-NEXT: {{ $}}
88+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
89+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
90+
; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]]
91+
%0:_(s32) = COPY $vgpr0
92+
%1:_(s32) = COPY $vgpr1
93+
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
94+
...
95+
96+
---
97+
name: uadde_s32_vv
98+
legalized: true
99+
100+
body: |
101+
bb.0:
102+
liveins: $vgpr0, $vgpr1, $vgpr2
103+
; CHECK-LABEL: name: uadde_s32_vv
104+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
105+
; CHECK-NEXT: {{ $}}
106+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
107+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
108+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
109+
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
110+
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
111+
; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
112+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
113+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]]
114+
%0:_(s32) = COPY $vgpr0
115+
%1:_(s32) = COPY $vgpr1
116+
%2:_(s32) = COPY $vgpr2
117+
%3:_(s1) = G_TRUNC %2
118+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
119+
...
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=amdgpu-regbankselect %s -o - | FileCheck %s
3+
4+
---
5+
name: add_s64_ss
6+
legalized: true
7+
8+
body: |
9+
bb.0:
10+
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
11+
; CHECK-LABEL: name: add_s64_ss
12+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
16+
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]]
17+
%0:_(s64) = COPY $sgpr0_sgpr1
18+
%1:_(s64) = COPY $sgpr2_sgpr3
19+
%2:_(s64) = G_ADD %0, %1
20+
...
21+
22+
---
23+
name: add_s64_sv
24+
legalized: true
25+
26+
body: |
27+
bb.0:
28+
liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
29+
; CHECK-LABEL: name: add_s64_sv
30+
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
33+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
34+
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
35+
%0:_(s64) = COPY $sgpr0_sgpr1
36+
%1:_(s64) = COPY $vgpr0_vgpr1
37+
%2:_(s64) = G_ADD %0, %1
38+
...
39+
40+
---
41+
name: add_s64_vs
42+
legalized: true
43+
44+
body: |
45+
bb.0:
46+
liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
47+
; CHECK-LABEL: name: add_s64_vs
48+
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
49+
; CHECK-NEXT: {{ $}}
50+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
51+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
52+
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
53+
%0:_(s64) = COPY $vgpr0_vgpr1
54+
%1:_(s64) = COPY $sgpr0_sgpr1
55+
%2:_(s64) = G_ADD %0, %1
56+
...
57+
58+
---
59+
name: add_s64_vv
60+
legalized: true
61+
62+
body: |
63+
bb.0:
64+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
65+
; CHECK-LABEL: name: add_s64_vv
66+
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
69+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
70+
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
71+
%0:_(s64) = COPY $vgpr0_vgpr1
72+
%1:_(s64) = COPY $vgpr2_vgpr3
73+
%2:_(s64) = G_ADD %0, %1
74+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
44

55
---
66
name: add_v2s16_ss
@@ -14,16 +14,8 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
17-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
18-
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
19-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
20-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
21-
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
22-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
23-
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
24-
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]]
25-
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32)
26-
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
17+
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]]
18+
; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
2719
%0:_(<2 x s16>) = COPY $sgpr0
2820
%1:_(<2 x s16>) = COPY $sgpr1
2921
%2:_(<2 x s16>) = G_ADD %0, %1

0 commit comments

Comments
 (0)