Skip to content

Commit 5963817

Browse files
committed
Add more rules for G_GLOBA_VALUE and add .ll test for G_GLOBAL_VALUE
1 parent 3e62f02 commit 5963817

File tree

4 files changed

+122
-1
lines changed

4 files changed

+122
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
856856
case SgprP5:
857857
case VgprP5:
858858
return LLT::pointer(5, 32);
859+
case SgprP8:
860+
return LLT::pointer(8, 128);
859861
case SgprV2S16:
860862
case VgprV2S16:
861863
case UniInVgprV2S16:
@@ -946,6 +948,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
946948
case SgprP3:
947949
case SgprP4:
948950
case SgprP5:
951+
case SgprP8:
949952
case SgprPtr32:
950953
case SgprPtr64:
951954
case SgprPtr128:
@@ -1029,6 +1032,7 @@ void RegBankLegalizeHelper::applyMappingDst(
10291032
case SgprP3:
10301033
case SgprP4:
10311034
case SgprP5:
1035+
case SgprP8:
10321036
case SgprV2S16:
10331037
case SgprV2S32:
10341038
case SgprV4S32:
@@ -1171,6 +1175,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
11711175
case SgprP3:
11721176
case SgprP4:
11731177
case SgprP5:
1178+
case SgprP8:
11741179
case SgprV2S16:
11751180
case SgprV2S32:
11761181
case SgprV4S32: {

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
6666
return MRI.getType(Reg) == LLT::pointer(4, 64);
6767
case P5:
6868
return MRI.getType(Reg) == LLT::pointer(5, 32);
69+
case P8:
70+
return MRI.getType(Reg) == LLT::pointer(8, 128);
6971
case Ptr32:
7072
return isAnyPtr(MRI.getType(Reg), 32);
7173
case Ptr64:
@@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
108110
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
109111
case UniP5:
110112
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113+
case UniP8:
114+
return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
111115
case UniPtr32:
112116
return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
113117
case UniPtr64:
@@ -905,7 +909,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
905909

906910
addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
907911

908-
addRulesForGOpcs({G_GLOBAL_VALUE}).Any({{UniP3}, {{SgprP3}, {}}});
912+
addRulesForGOpcs({G_GLOBAL_VALUE})
913+
.Any({{UniP0}, {{SgprP0}, {}}})
914+
.Any({{UniP1}, {{SgprP1}, {}}})
915+
.Any({{UniP3}, {{SgprP3}, {}}})
916+
.Any({{UniP4}, {{SgprP4}, {}}})
917+
.Any({{UniP8}, {{SgprP8}, {}}});
909918

910919
bool hasSALUFloat = ST->hasSALUFloatInsts();
911920

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID {
6363
P3,
6464
P4,
6565
P5,
66+
P8,
6667
Ptr32,
6768
Ptr64,
6869
Ptr128,
@@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID {
7273
UniP3,
7374
UniP4,
7475
UniP5,
76+
UniP8,
7577
UniPtr32,
7678
UniPtr64,
7779
UniPtr128,
@@ -139,6 +141,7 @@ enum RegBankLLTMappingApplyID {
139141
SgprP3,
140142
SgprP4,
141143
SgprP5,
144+
SgprP8,
142145
SgprPtr32,
143146
SgprPtr64,
144147
SgprPtr128,
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3+
4+
@flat = external global i32, align 4
5+
@global = external addrspace(1) global i32, align 4
6+
@lds = addrspace(3) global i32 poison, align 4
7+
@constant = external addrspace(4) constant i32, align 4
8+
@buf = external addrspace(8) global i8
9+
10+
define ptr @global_value_as0_external() {
11+
; GCN-LABEL: global_value_as0_external:
12+
; GCN: ; %bb.0:
13+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14+
; GCN-NEXT: s_getpc_b64 s[4:5]
15+
; GCN-NEXT: s_add_u32 s4, s4, flat@gotpcrel32@lo+4
16+
; GCN-NEXT: s_addc_u32 s5, s5, flat@gotpcrel32@hi+12
17+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
18+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
19+
; GCN-NEXT: v_mov_b32_e32 v0, s4
20+
; GCN-NEXT: v_mov_b32_e32 v1, s5
21+
; GCN-NEXT: s_setpc_b64 s[30:31]
22+
ret ptr @flat
23+
}
24+
25+
define ptr addrspace(1) @global_value_as1_external() {
26+
; GCN-LABEL: global_value_as1_external:
27+
; GCN: ; %bb.0:
28+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GCN-NEXT: s_getpc_b64 s[4:5]
30+
; GCN-NEXT: s_add_u32 s4, s4, global@gotpcrel32@lo+4
31+
; GCN-NEXT: s_addc_u32 s5, s5, global@gotpcrel32@hi+12
32+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
33+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
34+
; GCN-NEXT: v_mov_b32_e32 v0, s4
35+
; GCN-NEXT: v_mov_b32_e32 v1, s5
36+
; GCN-NEXT: s_setpc_b64 s[30:31]
37+
ret ptr addrspace(1) @global
38+
}
39+
40+
define ptr addrspace(4) @global_value_as4_external() {
41+
; GCN-LABEL: global_value_as4_external:
42+
; GCN: ; %bb.0:
43+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44+
; GCN-NEXT: s_getpc_b64 s[4:5]
45+
; GCN-NEXT: s_add_u32 s4, s4, constant@gotpcrel32@lo+4
46+
; GCN-NEXT: s_addc_u32 s5, s5, constant@gotpcrel32@hi+12
47+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
48+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
49+
; GCN-NEXT: v_mov_b32_e32 v0, s4
50+
; GCN-NEXT: v_mov_b32_e32 v1, s5
51+
; GCN-NEXT: s_setpc_b64 s[30:31]
52+
ret ptr addrspace(4) @constant
53+
}
54+
55+
define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) {
56+
; GCN-LABEL: global_value_as3_lds_kernel:
57+
; GCN: ; %bb.0:
58+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
59+
; GCN-NEXT: v_mov_b32_e32 v0, 0
60+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
61+
; GCN-NEXT: global_store_dword v0, v0, s[0:1]
62+
; GCN-NEXT: s_endpgm
63+
%addr = ptrtoint ptr addrspace(3) @lds to i32
64+
store i32 %addr, ptr addrspace(1) %out
65+
ret void
66+
}
67+
68+
define void @global_value_as8_buffer_store(i32 %val) {
69+
; GCN-LABEL: global_value_as8_buffer_store:
70+
; GCN: ; %bb.0:
71+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GCN-NEXT: s_getpc_b64 s[8:9]
73+
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
74+
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
75+
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
76+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
77+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
78+
; GCN-NEXT: s_waitcnt vmcnt(0)
79+
; GCN-NEXT: s_setpc_b64 s[30:31]
80+
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0)
81+
ret void
82+
}
83+
84+
define i32 @global_value_as8_buffer_load(i32 %offset) {
85+
; GCN-LABEL: global_value_as8_buffer_load:
86+
; GCN: ; %bb.0:
87+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; GCN-NEXT: s_getpc_b64 s[8:9]
89+
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
90+
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
91+
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
92+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
93+
; GCN-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
94+
; GCN-NEXT: s_waitcnt vmcnt(0)
95+
; GCN-NEXT: s_setpc_b64 s[30:31]
96+
%val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0)
97+
ret i32 %val
98+
}
99+
100+
declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0
101+
declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1
102+
103+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
104+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }

0 commit comments

Comments
 (0)