Skip to content

Commit b07ac8a

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merge 2885cbd and 2e93c88 into github-emu/amd-gfx-npi-sanitizable using ifdef guards
Note: this is an automatically generated branch and should NOT be used for development. This is provided for build purposes only.
3 parents 1758d6b + 2885cbd + 2e93c88 commit b07ac8a

13 files changed

+2674
-1
lines changed

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32> {
882882
#endif /* LLPC_BUILD_NPI */
883883
defm _TFE_VBUFFER : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 1, 1>;
884884
#ifdef LLPC_BUILD_NPI
885-
defm _TFE_VBUFFER_indexed : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 0, 1, RsrcReg128Op>;
885+
defm _TFE_VBUFFER_indexed : MUBUF_Pseudo_Stores_Helper<opName, store_vt, 1, 1, RsrcReg128Op>;
886886
#endif /* LLPC_BUILD_NPI */
887887
}
888888

@@ -3271,6 +3271,7 @@ class Mnem_gfx13 <string mnemonic, string real_name> :
32713271
multiclass MUBUF_Real_AllAddr_gfx13<bits<8> op, string real_name = get_BUF_ps<NAME>.Mnemonic> {
32723272
foreach mode = ["_BOTHEN", "_IDXEN", "_OFFEN", "_OFFSET"] in {
32733273
defm _VBUFFER_indexed # mode : VBUFFER_MUBUF_Real_gfx13<op, real_name>;
3274+
defm _TFE_VBUFFER_indexed # mode : VBUFFER_MUBUF_Real_gfx13<op, real_name>;
32743275
}
32753276
defvar ps = get_BUF_ps<NAME>;
32763277
if !ne(ps.Mnemonic, real_name) then

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.indexed.ll

Lines changed: 280 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.indexed.nonuniform.ll

Lines changed: 280 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll

Lines changed: 239 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.indexed.ll

Lines changed: 300 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.indexed.nonuniform.ll

Lines changed: 300 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.tfe.ll

Lines changed: 259 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mcpu=gfx1300 -mtriple=amdgcn-- < %s | FileCheck %s -check-prefixes=GFX13
3+
4+
define amdgpu_ps void @raw_buffer_load_i8_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
5+
; GFX13-LABEL: raw_buffer_load_i8_tfe:
6+
; GFX13: ; %bb.0:
7+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
8+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
9+
; GFX13-NEXT: v_mov_b32_e32 v5, v4
10+
; GFX13-NEXT: buffer_load_u8 v[4:5], off, s0, null tfe
11+
; GFX13-NEXT: s_wait_loadcnt 0x0
12+
; GFX13-NEXT: global_store_b8 v[0:1], v4, off scope:SCOPE_SE
13+
; GFX13-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SE
14+
; GFX13-NEXT: s_endpgm
15+
%res = call { i8, i32 } @llvm.amdgcn.raw.buffer.load.sl_i8i32s(i32 %rsrc, i32 0, i32 0, i32 0)
16+
%data = extractvalue { i8, i32 } %res, 0
17+
store i8 %data, ptr addrspace(1) %data_addr
18+
%tfe = extractvalue { i8, i32 } %res, 1
19+
store i32 %tfe, ptr addrspace(1) %tfe_addr
20+
ret void
21+
}
22+
23+
define amdgpu_ps void @raw_buffer_load_i16_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
24+
; GFX13-LABEL: raw_buffer_load_i16_tfe:
25+
; GFX13: ; %bb.0:
26+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
27+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
28+
; GFX13-NEXT: v_mov_b32_e32 v5, v4
29+
; GFX13-NEXT: buffer_load_u16 v[4:5], off, s0, null tfe
30+
; GFX13-NEXT: s_wait_loadcnt 0x0
31+
; GFX13-NEXT: global_store_b16 v[0:1], v4, off scope:SCOPE_SE
32+
; GFX13-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SE
33+
; GFX13-NEXT: s_endpgm
34+
%res = call { i16, i32 } @llvm.amdgcn.raw.buffer.load.sl_i16i32s(i32 %rsrc, i32 0, i32 0, i32 0)
35+
%data = extractvalue { i16, i32 } %res, 0
36+
store i16 %data, ptr addrspace(1) %data_addr
37+
%tfe = extractvalue { i16, i32 } %res, 1
38+
store i32 %tfe, ptr addrspace(1) %tfe_addr
39+
ret void
40+
}
41+
42+
define amdgpu_ps void @raw_buffer_load_f16_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
43+
; GFX13-LABEL: raw_buffer_load_f16_tfe:
44+
; GFX13: ; %bb.0:
45+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
46+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
47+
; GFX13-NEXT: v_mov_b32_e32 v5, v4
48+
; GFX13-NEXT: buffer_load_u16 v[4:5], off, s0, null tfe
49+
; GFX13-NEXT: s_wait_loadcnt 0x0
50+
; GFX13-NEXT: global_store_b16 v[0:1], v4, off scope:SCOPE_SE
51+
; GFX13-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SE
52+
; GFX13-NEXT: s_endpgm
53+
%res = call { half, i32 } @llvm.amdgcn.raw.buffer.load.sl_f16i32s(i32 %rsrc, i32 0, i32 0, i32 0)
54+
%data = extractvalue { half, i32 } %res, 0
55+
store half %data, ptr addrspace(1) %data_addr
56+
%tfe = extractvalue { half, i32 } %res, 1
57+
store i32 %tfe, ptr addrspace(1) %tfe_addr
58+
ret void
59+
}
60+
61+
define amdgpu_ps void @raw_buffer_load_i32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
62+
; GFX13-LABEL: raw_buffer_load_i32_tfe:
63+
; GFX13: ; %bb.0:
64+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
65+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
66+
; GFX13-NEXT: v_mov_b32_e32 v5, v4
67+
; GFX13-NEXT: buffer_load_b32 v[4:5], off, s0, null tfe
68+
; GFX13-NEXT: s_wait_loadcnt 0x0
69+
; GFX13-NEXT: global_store_b32 v[0:1], v4, off scope:SCOPE_SE
70+
; GFX13-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SE
71+
; GFX13-NEXT: s_endpgm
72+
%res = call { i32, i32 } @llvm.amdgcn.raw.buffer.load.sl_i32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
73+
%data = extractvalue { i32, i32 } %res, 0
74+
store i32 %data, ptr addrspace(1) %data_addr
75+
%tfe = extractvalue { i32, i32 } %res, 1
76+
store i32 %tfe, ptr addrspace(1) %tfe_addr
77+
ret void
78+
}
79+
80+
define amdgpu_ps void @raw_buffer_load_v2i32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
81+
; GFX13-LABEL: raw_buffer_load_v2i32_tfe:
82+
; GFX13: ; %bb.0:
83+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
84+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
85+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
86+
; GFX13-NEXT: buffer_load_b64 v[4:6], off, s0, null tfe
87+
; GFX13-NEXT: s_wait_loadcnt 0x0
88+
; GFX13-NEXT: global_store_b64 v[0:1], v[4:5], off scope:SCOPE_SE
89+
; GFX13-NEXT: global_store_b32 v[2:3], v6, off scope:SCOPE_SE
90+
; GFX13-NEXT: s_endpgm
91+
%res = call { <2 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2i32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
92+
%data = extractvalue { <2 x i32>, i32 } %res, 0
93+
store <2 x i32> %data, ptr addrspace(1) %data_addr
94+
%tfe = extractvalue { <2 x i32>, i32 } %res, 1
95+
store i32 %tfe, ptr addrspace(1) %tfe_addr
96+
ret void
97+
}
98+
99+
define amdgpu_ps void @raw_buffer_load_v2f32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
100+
; GFX13-LABEL: raw_buffer_load_v2f32_tfe:
101+
; GFX13: ; %bb.0:
102+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
103+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
104+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
105+
; GFX13-NEXT: buffer_load_b64 v[4:6], off, s0, null tfe
106+
; GFX13-NEXT: s_wait_loadcnt 0x0
107+
; GFX13-NEXT: global_store_b64 v[0:1], v[4:5], off scope:SCOPE_SE
108+
; GFX13-NEXT: global_store_b32 v[2:3], v6, off scope:SCOPE_SE
109+
; GFX13-NEXT: s_endpgm
110+
%res = call { <2 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v2f32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
111+
%data = extractvalue { <2 x float>, i32 } %res, 0
112+
store <2 x float> %data, ptr addrspace(1) %data_addr
113+
%tfe = extractvalue { <2 x float>, i32 } %res, 1
114+
store i32 %tfe, ptr addrspace(1) %tfe_addr
115+
ret void
116+
}
117+
118+
define amdgpu_ps void @raw_buffer_load_v3i32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
119+
; GFX13-LABEL: raw_buffer_load_v3i32_tfe:
120+
; GFX13: ; %bb.0:
121+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
122+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
123+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
124+
; GFX13-NEXT: v_mov_b32_e32 v7, v4
125+
; GFX13-NEXT: buffer_load_b96 v[4:7], off, s0, null tfe
126+
; GFX13-NEXT: s_wait_loadcnt 0x0
127+
; GFX13-NEXT: global_store_b96 v[0:1], v[4:6], off scope:SCOPE_SE
128+
; GFX13-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SE
129+
; GFX13-NEXT: s_endpgm
130+
%res = call { <3 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3i32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
131+
%data = extractvalue { <3 x i32>, i32 } %res, 0
132+
store <3 x i32> %data, ptr addrspace(1) %data_addr
133+
%tfe = extractvalue { <3 x i32>, i32 } %res, 1
134+
store i32 %tfe, ptr addrspace(1) %tfe_addr
135+
ret void
136+
}
137+
138+
define amdgpu_ps void @raw_buffer_load_v3f32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
139+
; GFX13-LABEL: raw_buffer_load_v3f32_tfe:
140+
; GFX13: ; %bb.0:
141+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
142+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
143+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
144+
; GFX13-NEXT: v_mov_b32_e32 v7, v4
145+
; GFX13-NEXT: buffer_load_b96 v[4:7], off, s0, null tfe
146+
; GFX13-NEXT: s_wait_loadcnt 0x0
147+
; GFX13-NEXT: global_store_b96 v[0:1], v[4:6], off scope:SCOPE_SE
148+
; GFX13-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SE
149+
; GFX13-NEXT: s_endpgm
150+
%res = call { <3 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v3f32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
151+
%data = extractvalue { <3 x float>, i32 } %res, 0
152+
store <3 x float> %data, ptr addrspace(1) %data_addr
153+
%tfe = extractvalue { <3 x float>, i32 } %res, 1
154+
store i32 %tfe, ptr addrspace(1) %tfe_addr
155+
ret void
156+
}
157+
158+
define amdgpu_ps void @raw_buffer_load_v4i32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
159+
; GFX13-LABEL: raw_buffer_load_v4i32_tfe:
160+
; GFX13: ; %bb.0:
161+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
162+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
163+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
164+
; GFX13-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
165+
; GFX13-NEXT: buffer_load_b128 v[4:8], off, s0, null tfe
166+
; GFX13-NEXT: s_wait_loadcnt 0x0
167+
; GFX13-NEXT: global_store_b128 v[0:1], v[4:7], off scope:SCOPE_SE
168+
; GFX13-NEXT: global_store_b32 v[2:3], v8, off scope:SCOPE_SE
169+
; GFX13-NEXT: s_endpgm
170+
%res = call { <4 x i32>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4i32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
171+
%data = extractvalue { <4 x i32>, i32 } %res, 0
172+
store <4 x i32> %data, ptr addrspace(1) %data_addr
173+
%tfe = extractvalue { <4 x i32>, i32 } %res, 1
174+
store i32 %tfe, ptr addrspace(1) %tfe_addr
175+
ret void
176+
}
177+
178+
define amdgpu_ps void @raw_buffer_load_v4f32_tfe(i32 inreg %rsrc, ptr addrspace(1) %data_addr, ptr addrspace(1) %tfe_addr) {
179+
; GFX13-LABEL: raw_buffer_load_v4f32_tfe:
180+
; GFX13: ; %bb.0:
181+
; GFX13-NEXT: v_mov_b32_e32 v4, 0
182+
; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1)
183+
; GFX13-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
184+
; GFX13-NEXT: v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
185+
; GFX13-NEXT: buffer_load_b128 v[4:8], off, s0, null tfe
186+
; GFX13-NEXT: s_wait_loadcnt 0x0
187+
; GFX13-NEXT: global_store_b128 v[0:1], v[4:7], off scope:SCOPE_SE
188+
; GFX13-NEXT: global_store_b32 v[2:3], v8, off scope:SCOPE_SE
189+
; GFX13-NEXT: s_endpgm
190+
%res = call { <4 x float>, i32 } @llvm.amdgcn.raw.buffer.load.sl_v4f32i32s(i32 %rsrc, i32 0, i32 0, i32 0)
191+
%data = extractvalue { <4 x float>, i32 } %res, 0
192+
store <4 x float> %data, ptr addrspace(1) %data_addr
193+
%tfe = extractvalue { <4 x float>, i32 } %res, 1
194+
store i32 %tfe, ptr addrspace(1) %tfe_addr
195+
ret void
196+
}

0 commit comments

Comments
 (0)