Skip to content

Conversation

@PankajDwivedi-25
Copy link
Contributor

@PankajDwivedi-25 PankajDwivedi-25 commented Mar 7, 2025

Avoid spilling and fold the offset into FIOp when the scavenger fails to find a free reg during frame index elimination.

Fixes #130120

@llvmbot
Copy link
Member

llvmbot commented Mar 7, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Pankaj Dwivedi (PankajDwivedi-25)

Changes

Frame index elimination uses an invalid scavenged register.
Refer issue: #130120


Full diff: https://github.com/llvm/llvm-project/pull/130287.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll (+63)
diff --git a/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
new file mode 100644
index 0000000000000..15f2d16caeb5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
@@ -0,0 +1,63 @@
+; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs 2>&1 %s | FileCheck %s --check-prefix=ASSERTION
+
+; This test case hit the assertion below, when register scavenger is unable to find a valid register.
+; ASSERTION: Assertion `getReg().isPhysical() && "setIsRenamable should only be called on physical registers
+
+define amdgpu_gfx [13 x i32] @_sect_5() {
+bb:
+  %i = alloca [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], i32 0, align 16, addrspace(5)
+  %i1 = getelementptr [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], ptr addrspace(5) %i, i32 0, i32 0, i32 20
+  %i2 = getelementptr [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], ptr addrspace(5) %i, i32 0, i32 6, i32 20
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb
+  %i4 = phi i32 [ 1, %bb ], [ 0, %bb3 ]
+  %i5 = icmp eq i32 %i4, 0
+  %i6 = select i1 %i5, ptr addrspace(5) %i2, ptr addrspace(5) %i1
+  store i32 0, ptr addrspace(5) %i6, align 16
+  %i7 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 1
+  %i8 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i7
+  store float 0.000000e+00, ptr addrspace(5) %i8, align 4
+  %i9 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 2
+  %i10 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i9
+  store i32 0, ptr addrspace(5) %i10, align 8
+  %i11 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 3
+  %i12 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i11
+  store i32 0, ptr addrspace(5) %i12, align 4
+  %i13 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 4
+  %i14 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i13
+  store i32 0, ptr addrspace(5) %i14, align 16
+  %i15 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 5
+  %i16 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i15
+  store i32 0, ptr addrspace(5) %i16, align 4
+  %i17 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 6
+  %i18 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i17
+  store <2 x float> zeroinitializer, ptr addrspace(5) %i18, align 8
+  %i19 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 7
+  %i20 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i19
+  store i32 0, ptr addrspace(5) %i20, align 16
+  %i21 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 8
+  %i22 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i21
+  store <3 x float> zeroinitializer, ptr addrspace(5) %i22, align 16
+  %i23 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i1
+  store <3 x float> zeroinitializer, ptr addrspace(5) %i23, align 16
+  %i24 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 1
+  %i25 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i24
+  store i32 0, ptr addrspace(5) %i25, align 4
+  %i26 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 2
+  %i27 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i26
+  store i32 0, ptr addrspace(5) %i27, align 8
+  %i28 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 3
+  %i29 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i28
+  store i32 0, ptr addrspace(5) %i29, align 4
+  %i30 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 4
+  %i31 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i30
+  store i32 0, ptr addrspace(5) %i31, align 16
+  %i32 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 5
+  %i33 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i32
+  store i32 0, ptr addrspace(5) %i33, align 4
+  %i34 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 6
+  %i35 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i34
+  store i32 0, ptr addrspace(5) %i35, align 8
+  br label %bb3
+}

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're actually going to work on the issue and not just leave it for tracking, it's usually best to submit the failing test with the PR. If it's just for tracking, test filename should be the issue name (or at least the test name should include the issue number)

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add a synthetic case that hits this from eliminate-frame-index-*.mir tests?

@PankajDwivedi-25
Copy link
Contributor Author

Frame index elimination for s_add_i32 still has additional cases that need to be addressed.
I am deferring them for now as a separate task.

@PankajDwivedi-25 PankajDwivedi-25 changed the title [WIP][AMDGPU] frame index elimination hit assertion for scavenged nonreg [AMDGPU] frame index elimination hit assertion for scavenged nonreg Mar 10, 2025
@PankajDwivedi-25 PankajDwivedi-25 force-pushed the users/pkd-25/fix_s_add_i32_fi_elimination branch from 3f99f29 to 9eac68f Compare March 12, 2025 08:01
OtherOp.setIsKill(true);
OtherOp.setIsRenamable(true);
// Avoid clobbering framereg if scavenger could not find a free sgpr.
if (DstReg) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DstReg shouldn't be unset. If anything is conditional it should be on TmpReg

.add(OtherOp);
if (DeadSCC)
AddI32.setOperandDead(3);
if (TmpReg) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition here still doesn't make sense. The TmpReg is not used below. I'm confused by the whole flow now, it would make more sense to check MaterializedReg

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, here we have reached at the point, where we update the original DstReg with TmpReg.
so now if tmp is valid it will work perfectly. In the case if it is not we have to make condition on DstReg or TmpReg.
Materializedreg is FrameReg at this point making check on that, I don't think make sense?

obviously TmpReg is not getting used directly but we update here DstReg with TmpReg.

@PankajDwivedi-25
Copy link
Contributor Author

PankajDwivedi-25 commented Mar 17, 2025

merging it now.

@PankajDwivedi-25 PankajDwivedi-25 merged commit 1c3a9a8 into llvm:main Mar 17, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[AMDGPU] Assertion "setIsRenamable should only be called on physical registers" failed in PEI

4 participants