-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Handle MachineOperandType global address in SIFoldOperands. #135424
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1161,6 +1161,10 @@ void SIFoldOperandsImpl::foldOperand( | |
|
|
||
| if (OpToFold.isImm()) | ||
| UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); | ||
| else if (OpToFold.isGlobal()) | ||
| UseMI->getOperand(1).ChangeToGA(OpToFold.getGlobal(), | ||
| OpToFold.getOffset(), | ||
| OpToFold.getTargetFlags()); | ||
| else | ||
| UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex()); | ||
|
||
| UseMI->removeOperand(2); // Remove exec read (or src1 for readlane) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s | ||
isakhilesh marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| define void @test_load_zext() { | ||
| ; CHECK-LABEL: test_load_zext: | ||
| ; CHECK: ; %bb.0: ; %.entry | ||
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
| ; CHECK-NEXT: s_mov_b32 s0, s33 | ||
| ; CHECK-NEXT: s_mov_b32 s33, s32 | ||
| ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 | ||
| ; CHECK-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill | ||
| ; CHECK-NEXT: s_mov_b64 exec, s[2:3] | ||
| ; CHECK-NEXT: s_add_i32 s32, s32, 16 | ||
| ; CHECK-NEXT: v_writelane_b32 v40, s0, 2 | ||
| ; CHECK-NEXT: s_getpc_b64 s[0:1] | ||
| ; CHECK-NEXT: s_add_u32 s0, s0, has_spgr_args@gotpcrel32@lo+4 | ||
| ; CHECK-NEXT: s_addc_u32 s1, s1, has_spgr_args@gotpcrel32@hi+12 | ||
| ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 | ||
| ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 | ||
| ; CHECK-NEXT: s_mov_b32 s0, DescriptorBuffer@abs32@lo | ||
| ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 | ||
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) | ||
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[2:3] | ||
| ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 | ||
| ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 | ||
| ; CHECK-NEXT: s_mov_b32 s32, s33 | ||
| ; CHECK-NEXT: v_readlane_b32 s0, v40, 2 | ||
| ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 | ||
| ; CHECK-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload | ||
| ; CHECK-NEXT: s_mov_b64 exec, s[2:3] | ||
| ; CHECK-NEXT: s_mov_b32 s33, s0 | ||
| ; CHECK-NEXT: s_waitcnt vmcnt(0) | ||
| ; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
| .entry: | ||
| %reloc = call i32 @llvm.amdgcn.reloc.constant(metadata !0) | ||
| call void @has_spgr_args(i32 %reloc) | ||
| ret void | ||
| } | ||
|
|
||
| declare void @has_spgr_args(i32 inreg) | ||
|
|
||
| declare i32 @llvm.amdgcn.reloc.constant(metadata) #0 | ||
|
|
||
| attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | ||
|
|
||
| !0 = !{!"DescriptorBuffer", i32 4, i32 8, i32 0, i32 0} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we really need these attribute?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have removed the attribute code in line 44 but have kept the code in line 46 because without it there was an error asking for !0. |
||
Uh oh!
There was an error while loading. Please reload this page.