Skip to content

Commit 22cdc02

Browse files
committed
[AMDGPU] Support alloca in AS0
This PR lowers an alloca in AS0 to an alloca in AS5 followed by an addrspacecast back to AS0.
1 parent 85b35a9 commit 22cdc02

File tree

7 files changed

+276
-19
lines changed

7 files changed

+276
-19
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
385385

386386
setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
387387

388+
setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
389+
388390
// For R600, this is totally unsupported, just custom lower to produce an
389391
// error.
390392
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
393+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
391394

392395
// Library functions. These default to Expand, but we have instructions
393396
// for them.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -912,12 +912,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
912912
.widenScalarToNextPow2(0, 32)
913913
.clampMaxNumElements(0, S32, 16);
914914

915-
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({PrivatePtr});
915+
getActionDefinitionsBuilder(G_FRAME_INDEX)
916+
.legalFor({PrivatePtr})
917+
.customFor({FlatPtr});
916918

917919
// If the amount is divergent, we have to do a wave reduction to get the
918920
// maximum value, so this is expanded during RegBankSelect.
919921
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
920-
.legalFor({{PrivatePtr, S32}});
922+
.legalFor({{PrivatePtr, S32}})
923+
.customFor({FlatPtr, S32});
921924

922925
getActionDefinitionsBuilder(G_STACKSAVE)
923926
.customFor({PrivatePtr});
@@ -2221,6 +2224,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(
22212224
return legalizeTrap(MI, MRI, B);
22222225
case TargetOpcode::G_DEBUGTRAP:
22232226
return legalizeDebugTrap(MI, MRI, B);
2227+
case TargetOpcode::G_FRAME_INDEX:
2228+
return legalizeFrameIndex(MI, MRI, B);
2229+
case TargetOpcode::G_DYN_STACKALLOC:
2230+
return legalizeDynStackAlloc(MI, MRI, B);
22242231
default:
22252232
return false;
22262233
}
@@ -7668,3 +7675,25 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
76687675

76697676
return true;
76707677
}
7678+
7679+
bool AMDGPULegalizerInfo::legalizeFrameIndex(MachineInstr &MI,
7680+
MachineRegisterInfo &MRI,
7681+
MachineIRBuilder &B) const {
7682+
MachineInstrBuilder FI = B.buildFrameIndex(
7683+
LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), MI.getOperand(1).getIndex());
7684+
B.buildAddrSpaceCast(MI.getOperand(0).getReg(), FI);
7685+
MI.eraseFromParent();
7686+
return true;
7687+
}
7688+
7689+
bool AMDGPULegalizerInfo::legalizeDynStackAlloc(MachineInstr &MI,
7690+
MachineRegisterInfo &MRI,
7691+
MachineIRBuilder &B) const {
7692+
MachineInstrBuilder Size = B.buildTrunc(S32, MI.getOperand(1));
7693+
Align Alignment(MI.getOperand(2).getImm());
7694+
MachineInstrBuilder DynStackAlloc = B.buildDynStackAlloc(
7695+
LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), Size, Alignment);
7696+
B.buildAddrSpaceCast(MI.getOperand(0).getReg(), DynStackAlloc);
7697+
MI.eraseFromParent();
7698+
return true;
7699+
}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
246246

247247
bool legalizeIntrinsic(LegalizerHelper &Helper,
248248
MachineInstr &MI) const override;
249+
250+
bool legalizeFrameIndex(MachineInstr &MI, MachineRegisterInfo &MRI,
251+
MachineIRBuilder &B) const;
252+
bool legalizeDynStackAlloc(MachineInstr &MI, MachineRegisterInfo &MRI,
253+
MachineIRBuilder &B) const;
249254
};
250255
} // End llvm namespace.
251256
#endif

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4117,6 +4117,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
41174117
InVals, /*IsThisReturn=*/false, SDValue());
41184118
}
41194119

4120+
SDValue SITargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
4121+
// Since address space information is lost here, we assume that an i64 frame
4122+
// index comes from an alloca in AS0.
4123+
SDLoc DL(Op);
4124+
auto *FI = cast<FrameIndexSDNode>(Op);
4125+
SDValue TFI = DAG.getFrameIndex(FI->getIndex(), MVT::i32);
4126+
return DAG.getAddrSpaceCast(DL, Op.getValueType(), TFI,
4127+
AMDGPUAS::PRIVATE_ADDRESS,
4128+
AMDGPUAS::FLAT_ADDRESS);
4129+
}
4130+
41204131
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
41214132
// except for:
41224133
// 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
@@ -4129,13 +4140,27 @@ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
41294140
SDLoc dl(Op);
41304141
EVT VT = Op.getValueType();
41314142
SDValue Chain = Op.getOperand(0);
4143+
SDValue Size = Op.getOperand(1);
4144+
4145+
// Since address space information is lost here, we assume that an i64 dynamic
4146+
// alloca comes from an alloca in AS0.
4147+
if (VT == MVT::i64) {
4148+
SDValue Align = Op.getOperand(2);
4149+
Size = DAG.getZExtOrTrunc(Size, dl, MVT::i32);
4150+
SDValue Ops[] = {Chain, Size, Align};
4151+
SDValue DynAlloc =
4152+
DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, {MVT::i32, MVT::Other}, Ops);
4153+
SDValue Cast = DAG.getAddrSpaceCast(
4154+
dl, VT, DynAlloc, AMDGPUAS::PRIVATE_ADDRESS, AMDGPUAS::FLAT_ADDRESS);
4155+
return DAG.getMergeValues({Cast, DynAlloc.getValue(1)}, dl);
4156+
}
4157+
41324158
Register SPReg = Info->getStackPtrOffsetReg();
41334159

41344160
// Chain the dynamic stack allocation so that it doesn't modify the stack
41354161
// pointer when other instructions are using the stack.
41364162
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
41374163

4138-
SDValue Size = Op.getOperand(1);
41394164
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
41404165
Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
41414166

@@ -6087,6 +6112,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
60876112
case ISD::SMUL_LOHI:
60886113
case ISD::UMUL_LOHI:
60896114
return lowerXMUL_LOHI(Op, DAG);
6115+
case ISD::FrameIndex:
6116+
return lowerFrameIndex(Op, DAG);
60906117
case ISD::DYNAMIC_STACKALLOC:
60916118
return LowerDYNAMIC_STACKALLOC(Op, DAG);
60926119
case ISD::STACKSAVE:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
428428
SDValue LowerCall(CallLoweringInfo &CLI,
429429
SmallVectorImpl<SDValue> &InVals) const override;
430430

431+
SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
431432
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
432433
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
433434
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s --check-prefix=ISEL
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel %s -o - | FileCheck %s --check-prefix=GI
4+
5+
declare void @bar(ptr)
6+
7+
define i32 @static_alloca() {
8+
; ISEL-LABEL: static_alloca:
9+
; ISEL: ; %bb.0:
10+
; ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; ISEL-NEXT: s_mov_b32 s16, s33
12+
; ISEL-NEXT: s_mov_b32 s33, s32
13+
; ISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
14+
; ISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
15+
; ISEL-NEXT: s_mov_b64 exec, s[18:19]
16+
; ISEL-NEXT: s_addk_i32 s32, 0x400
17+
; ISEL-NEXT: v_writelane_b32 v40, s16, 4
18+
; ISEL-NEXT: s_getpc_b64 s[16:17]
19+
; ISEL-NEXT: s_add_u32 s16, s16, bar@gotpcrel32@lo+4
20+
; ISEL-NEXT: s_addc_u32 s17, s17, bar@gotpcrel32@hi+12
21+
; ISEL-NEXT: v_writelane_b32 v40, s30, 0
22+
; ISEL-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
23+
; ISEL-NEXT: s_lshr_b32 s18, s33, 6
24+
; ISEL-NEXT: v_lshrrev_b32_e64 v0, 6, s33
25+
; ISEL-NEXT: v_writelane_b32 v40, s31, 1
26+
; ISEL-NEXT: s_cmp_lg_u32 s18, -1
27+
; ISEL-NEXT: v_readfirstlane_b32 s18, v0
28+
; ISEL-NEXT: v_writelane_b32 v40, s34, 2
29+
; ISEL-NEXT: s_cselect_b32 s34, s18, 0
30+
; ISEL-NEXT: s_mov_b64 s[18:19], src_private_base
31+
; ISEL-NEXT: v_writelane_b32 v40, s35, 3
32+
; ISEL-NEXT: s_cselect_b32 s35, s19, 0
33+
; ISEL-NEXT: v_mov_b32_e32 v0, s34
34+
; ISEL-NEXT: v_mov_b32_e32 v1, s35
35+
; ISEL-NEXT: s_waitcnt lgkmcnt(0)
36+
; ISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
37+
; ISEL-NEXT: v_mov_b32_e32 v0, s34
38+
; ISEL-NEXT: v_mov_b32_e32 v1, s35
39+
; ISEL-NEXT: flat_load_dword v0, v[0:1]
40+
; ISEL-NEXT: v_readlane_b32 s35, v40, 3
41+
; ISEL-NEXT: v_readlane_b32 s34, v40, 2
42+
; ISEL-NEXT: v_readlane_b32 s31, v40, 1
43+
; ISEL-NEXT: v_readlane_b32 s30, v40, 0
44+
; ISEL-NEXT: s_mov_b32 s32, s33
45+
; ISEL-NEXT: v_readlane_b32 s4, v40, 4
46+
; ISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
47+
; ISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
48+
; ISEL-NEXT: s_mov_b64 exec, s[6:7]
49+
; ISEL-NEXT: s_mov_b32 s33, s4
50+
; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
51+
; ISEL-NEXT: s_setpc_b64 s[30:31]
52+
;
53+
; GI-LABEL: static_alloca:
54+
; GI: ; %bb.0:
55+
; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56+
; GI-NEXT: s_mov_b32 s16, s33
57+
; GI-NEXT: s_mov_b32 s33, s32
58+
; GI-NEXT: s_or_saveexec_b64 s[18:19], -1
59+
; GI-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
60+
; GI-NEXT: s_mov_b64 exec, s[18:19]
61+
; GI-NEXT: v_writelane_b32 v40, s16, 4
62+
; GI-NEXT: v_writelane_b32 v40, s30, 0
63+
; GI-NEXT: v_writelane_b32 v40, s31, 1
64+
; GI-NEXT: s_addk_i32 s32, 0x400
65+
; GI-NEXT: v_writelane_b32 v40, s34, 2
66+
; GI-NEXT: s_lshr_b32 s34, s33, 6
67+
; GI-NEXT: s_getpc_b64 s[18:19]
68+
; GI-NEXT: s_add_u32 s18, s18, bar@gotpcrel32@lo+4
69+
; GI-NEXT: s_addc_u32 s19, s19, bar@gotpcrel32@hi+12
70+
; GI-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
71+
; GI-NEXT: s_mov_b64 s[16:17], src_private_base
72+
; GI-NEXT: v_lshrrev_b32_e64 v0, 6, s33
73+
; GI-NEXT: v_mov_b32_e32 v1, s17
74+
; GI-NEXT: v_writelane_b32 v40, s35, 3
75+
; GI-NEXT: s_mov_b32 s35, s17
76+
; GI-NEXT: s_waitcnt lgkmcnt(0)
77+
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
78+
; GI-NEXT: v_mov_b32_e32 v0, s34
79+
; GI-NEXT: v_mov_b32_e32 v1, s35
80+
; GI-NEXT: flat_load_dword v0, v[0:1]
81+
; GI-NEXT: v_readlane_b32 s35, v40, 3
82+
; GI-NEXT: v_readlane_b32 s34, v40, 2
83+
; GI-NEXT: v_readlane_b32 s31, v40, 1
84+
; GI-NEXT: v_readlane_b32 s30, v40, 0
85+
; GI-NEXT: s_mov_b32 s32, s33
86+
; GI-NEXT: v_readlane_b32 s4, v40, 4
87+
; GI-NEXT: s_or_saveexec_b64 s[6:7], -1
88+
; GI-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
89+
; GI-NEXT: s_mov_b64 exec, s[6:7]
90+
; GI-NEXT: s_mov_b32 s33, s4
91+
; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
92+
; GI-NEXT: s_setpc_b64 s[30:31]
93+
%alloca = alloca i32, align 4
94+
call void @bar(ptr %alloca)
95+
%load = load i32, ptr %alloca
96+
ret i32 %load
97+
}
98+
99+
define i32 @dynamic_alloca(i32 %n) {
100+
; ISEL-LABEL: dynamic_alloca:
101+
; ISEL: ; %bb.0:
102+
; ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103+
; ISEL-NEXT: s_mov_b32 s16, s33
104+
; ISEL-NEXT: s_mov_b32 s33, s32
105+
; ISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
106+
; ISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
107+
; ISEL-NEXT: s_mov_b64 exec, s[18:19]
108+
; ISEL-NEXT: v_writelane_b32 v40, s16, 2
109+
; ISEL-NEXT: v_mad_u64_u32 v[0:1], s[16:17], v0, 4, 15
110+
; ISEL-NEXT: v_writelane_b32 v40, s30, 0
111+
; ISEL-NEXT: s_mov_b32 s18, 0
112+
; ISEL-NEXT: v_and_b32_e32 v0, -16, v0
113+
; ISEL-NEXT: s_mov_b64 s[16:17], exec
114+
; ISEL-NEXT: s_addk_i32 s32, 0x400
115+
; ISEL-NEXT: v_writelane_b32 v40, s31, 1
116+
; ISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
117+
; ISEL-NEXT: s_ff1_i32_b64 s19, s[16:17]
118+
; ISEL-NEXT: v_readlane_b32 s20, v0, s19
119+
; ISEL-NEXT: s_bitset0_b64 s[16:17], s19
120+
; ISEL-NEXT: s_max_u32 s18, s18, s20
121+
; ISEL-NEXT: s_cmp_lg_u64 s[16:17], 0
122+
; ISEL-NEXT: s_cbranch_scc1 .LBB1_1
123+
; ISEL-NEXT: ; %bb.2:
124+
; ISEL-NEXT: s_cmp_lg_u32 s32, -1
125+
; ISEL-NEXT: s_mov_b64 s[16:17], src_private_base
126+
; ISEL-NEXT: s_cselect_b32 s19, s32, 0
127+
; ISEL-NEXT: s_cselect_b32 s20, s17, 0
128+
; ISEL-NEXT: s_getpc_b64 s[16:17]
129+
; ISEL-NEXT: s_add_u32 s16, s16, bar@gotpcrel32@lo+4
130+
; ISEL-NEXT: s_addc_u32 s17, s17, bar@gotpcrel32@hi+12
131+
; ISEL-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
132+
; ISEL-NEXT: v_mov_b32_e32 v0, s32
133+
; ISEL-NEXT: v_lshl_add_u32 v0, s18, 6, v0
134+
; ISEL-NEXT: v_readfirstlane_b32 s18, v0
135+
; ISEL-NEXT: v_mov_b32_e32 v0, s19
136+
; ISEL-NEXT: v_mov_b32_e32 v1, s20
137+
; ISEL-NEXT: s_mov_b32 s32, s18
138+
; ISEL-NEXT: s_waitcnt lgkmcnt(0)
139+
; ISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
140+
; ISEL-NEXT: v_mov_b32_e32 v0, 0
141+
; ISEL-NEXT: v_readlane_b32 s31, v40, 1
142+
; ISEL-NEXT: v_readlane_b32 s30, v40, 0
143+
; ISEL-NEXT: s_mov_b32 s32, s33
144+
; ISEL-NEXT: v_readlane_b32 s4, v40, 2
145+
; ISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
146+
; ISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
147+
; ISEL-NEXT: s_mov_b64 exec, s[6:7]
148+
; ISEL-NEXT: s_mov_b32 s33, s4
149+
; ISEL-NEXT: s_waitcnt vmcnt(0)
150+
; ISEL-NEXT: s_setpc_b64 s[30:31]
151+
;
152+
; GI-LABEL: dynamic_alloca:
153+
; GI: ; %bb.0:
154+
; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155+
; GI-NEXT: s_mov_b32 s16, s33
156+
; GI-NEXT: s_mov_b32 s33, s32
157+
; GI-NEXT: s_or_saveexec_b64 s[18:19], -1
158+
; GI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
159+
; GI-NEXT: s_mov_b64 exec, s[18:19]
160+
; GI-NEXT: v_mov_b32_e32 v1, 0
161+
; GI-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
162+
; GI-NEXT: v_writelane_b32 v40, s16, 2
163+
; GI-NEXT: v_add_co_u32_e32 v0, vcc, 15, v0
164+
; GI-NEXT: v_writelane_b32 v40, s30, 0
165+
; GI-NEXT: s_mov_b32 s18, 0
166+
; GI-NEXT: v_and_b32_e32 v0, -16, v0
167+
; GI-NEXT: s_mov_b64 s[16:17], exec
168+
; GI-NEXT: s_addk_i32 s32, 0x400
169+
; GI-NEXT: v_writelane_b32 v40, s31, 1
170+
; GI-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
171+
; GI-NEXT: s_ff1_i32_b64 s19, s[16:17]
172+
; GI-NEXT: v_readlane_b32 s20, v0, s19
173+
; GI-NEXT: s_bitset0_b64 s[16:17], s19
174+
; GI-NEXT: s_max_u32 s18, s18, s20
175+
; GI-NEXT: s_cmp_lg_u64 s[16:17], 0
176+
; GI-NEXT: s_cbranch_scc1 .LBB1_1
177+
; GI-NEXT: ; %bb.2:
178+
; GI-NEXT: s_mov_b32 s16, s32
179+
; GI-NEXT: s_lshl_b32 s17, s18, 6
180+
; GI-NEXT: s_add_u32 s32, s16, s17
181+
; GI-NEXT: s_mov_b64 s[18:19], src_private_base
182+
; GI-NEXT: s_mov_b32 s17, s19
183+
; GI-NEXT: s_cmp_lg_u32 s16, -1
184+
; GI-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
185+
; GI-NEXT: s_getpc_b64 s[18:19]
186+
; GI-NEXT: s_add_u32 s18, s18, bar@gotpcrel32@lo+4
187+
; GI-NEXT: s_addc_u32 s19, s19, bar@gotpcrel32@hi+12
188+
; GI-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
189+
; GI-NEXT: v_mov_b32_e32 v0, s16
190+
; GI-NEXT: v_mov_b32_e32 v1, s17
191+
; GI-NEXT: s_waitcnt lgkmcnt(0)
192+
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
193+
; GI-NEXT: v_mov_b32_e32 v0, 0
194+
; GI-NEXT: v_readlane_b32 s31, v40, 1
195+
; GI-NEXT: v_readlane_b32 s30, v40, 0
196+
; GI-NEXT: s_mov_b32 s32, s33
197+
; GI-NEXT: v_readlane_b32 s4, v40, 2
198+
; GI-NEXT: s_or_saveexec_b64 s[6:7], -1
199+
; GI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
200+
; GI-NEXT: s_mov_b64 exec, s[6:7]
201+
; GI-NEXT: s_mov_b32 s33, s4
202+
; GI-NEXT: s_waitcnt vmcnt(0)
203+
; GI-NEXT: s_setpc_b64 s[30:31]
204+
%alloca = alloca i32, i32 %n, align 4
205+
call void @bar(ptr %alloca)
206+
%load = load i32, ptr %alloca
207+
ret i32 0
208+
}

llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)