-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Fix SIFixSGPRCopies To Handle Physical Registers #149859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -256,17 +256,18 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage( | |
|
|
||
| const Function *Callee = getCalleeFunction(*CalleeOp); | ||
|
|
||
| auto isSameFunction = [](const MachineFunction &MF, const Function *F) { | ||
| return F == &MF.getFunction(); | ||
| }; | ||
|
|
||
| // Avoid crashing on undefined behavior with an illegal call to a | ||
| // kernel. If a callsite's calling convention doesn't match the | ||
| // function's, it's undefined behavior. If the callsite calling | ||
| // convention does match, that would have errored earlier. | ||
| if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) | ||
| if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()) && | ||
| !isSameFunction(MF, Callee)) | ||
| report_fatal_error("invalid call to entry function"); | ||
|
Comment on lines
+267
to
269
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I may be missing something but why is this error message being constrained?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The added testcase, from a fuzzer, uses self-recursion. If that's not allowed, please let me know so the fuzzer can be updated.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIU, the |
||
|
|
||
| auto isSameFunction = [](const MachineFunction &MF, const Function *F) { | ||
| return F == &MF.getFunction(); | ||
| }; | ||
|
|
||
| if (Callee && !isSameFunction(MF, Callee)) | ||
| Info.Callees.push_back(Callee); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -898,12 +898,13 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, | |
| TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) | ||
| .add(MI.getOperand(1)); | ||
| MI.getOperand(1).setReg(TmpReg); | ||
| return true; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No else after return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Never mind I understand now. |
||
| } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(), | ||
| MI, MI.getDebugLoc())) { | ||
| I = std::next(I); | ||
| MI.eraseFromParent(); | ||
| return true; | ||
| } | ||
| return true; | ||
| } | ||
| if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) { | ||
| SIInstrWorklist worklist; | ||
|
|
@@ -929,7 +930,9 @@ void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) { | |
| if (PHISources.contains(MI)) | ||
| return; | ||
| Register DstReg = MI->getOperand(0).getReg(); | ||
| const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); | ||
| const TargetRegisterClass *DstRC = DstReg.isVirtual() | ||
| ? MRI->getRegClass(DstReg) | ||
| : TRI->getPhysRegBaseClass(DstReg); | ||
|
|
||
| V2SCopyInfo Info(getNextVGPRToSGPRCopyId(), MI, | ||
| TRI->getRegSizeInBits(*DstRC)); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O1 < %s | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs to be piped to I think this test file could use some cleanup (manually or automated through |
||
| source_filename = "i1-copy-from-loop.ll" | ||
|
|
||
| @G = global ptr addrspace(8) poison | ||
|
|
||
| define amdgpu_ps void @i1_copy_from_loop(ptr addrspace(8) inreg %rsrc, i32 %tid) { | ||
| entry: | ||
| br label %for.body | ||
|
|
||
| for.body: ; preds = %end.loop, %entry | ||
| %i = phi i32 [ 0, %entry ], [ %i.inc, %end.loop ] | ||
| %LGV = load ptr addrspace(8), ptr @G, align 8 | ||
| %cc = icmp ult i32 %i, 4 | ||
| call void @i1_copy_from_loop(ptr addrspace(8) %LGV, i32 -2147483648) | ||
| br i1 %cc, label %mid.loop, label %for.end | ||
|
|
||
| mid.loop: ; preds = %for.body | ||
| %v = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %tid, i32 %i, i32 0, i32 0) | ||
| %cc2 = fcmp oge float %v, 0.000000e+00 | ||
| br i1 %cc2, label %end.loop, label %for.end | ||
|
|
||
| end.loop: ; preds = %mid.loop | ||
| %i.inc = add i32 %i, 1 | ||
| br label %for.body | ||
|
|
||
| for.end: ; preds = %mid.loop, %for.body | ||
| br i1 %cc, label %if, label %end | ||
|
|
||
| if: ; preds = %for.end | ||
| call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float undef, float undef, float undef, i1 true, i1 true) | ||
| br label %end | ||
|
|
||
| end: ; preds = %if, %for.end | ||
| ret void | ||
| } | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read) | ||
| declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg) #0 | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) | ||
| declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1 | ||
|
|
||
| attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } | ||
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | ||
This file was deleted.
This file was deleted.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,3 @@ | ||
| ; XFAIL: * | ||
| ; REQUIRES: asserts | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't require asserts, and also needs to check what is produced |
||
| ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't belong in this PR (and we should also delete LiveVariables)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code was necessary to get the added testcase to pass; otherwise it would crash in LiveVariables.