Skip to content

Commit 0ee704b

Browse files
macurtis-amddavid-salinas
authored andcommitted
AMDGPU: Fix assert when multi operands to update after folding imm (llvm#148205)
In the original motivating test case, [FoldList](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1764) had entries: ``` #0: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc UseOpNo: 1 #1: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc UseOpNo: 2 ``` After calling [updateOperand(#0)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1773), [tryConstantFoldOp(#0.UseMI)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1786) removed operand 1, and entry #&llvm#8203;1.UseOpNo was no longer valid, resulting in an [assert](https://github.com/llvm/llvm-project/blob/4a35214bddbb67f9597a500d48ab8c4fb25af150/llvm/include/llvm/ADT/ArrayRef.h#L452). This change defers constant folding until all operands have been updated so that UseOpNo values remain stable.
1 parent cc3d141 commit 0ee704b

File tree

10 files changed

+45
-21
lines changed

10 files changed

+45
-21
lines changed

amd/comgr/test-lit/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ if (NOT DEFINED LLVM_LIT_PATH)
2121
endif()
2222
message("-- LLVM_LIT_PATH: ${LLVM_LIT_PATH}")
2323

24-
add_custom_target(test-lit COMMAND "${LLVM_LIT_PATH}"
24+
# TODO: Re-enable target once nPSDB issue with llvm-lit is fixed
25+
#add_custom_target(test-lit COMMAND "${LLVM_LIT_PATH}"
26+
# "${CMAKE_CURRENT_BINARY_DIR}" -v)
27+
add_custom_target(test-lit COMMAND echo "${LLVM_LIT_PATH}"
2528
"${CMAKE_CURRENT_BINARY_DIR}" -v)
2629

2730
macro(add_comgr_lit_binary name lang)

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,6 +1765,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
17651765
for (MachineInstr *Copy : CopiesToReplace)
17661766
Copy->addImplicitDefUseOperands(*MF);
17671767

1768+
SetVector<MachineInstr *> ConstantFoldCandidates;
17681769
for (FoldCandidate &Fold : FoldList) {
17691770
assert(!Fold.isReg() || Fold.Def.OpToFold);
17701771
if (Fold.isReg() && Fold.getReg().isVirtual()) {
@@ -1786,11 +1787,22 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
17861787
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
17871788
<< static_cast<int>(Fold.UseOpNo) << " of "
17881789
<< *Fold.UseMI);
1790+
1791+
if (Fold.isImm())
1792+
ConstantFoldCandidates.insert(Fold.UseMI);
1793+
17891794
} else if (Fold.Commuted) {
17901795
// Restoring instruction's original operand order if fold has failed.
17911796
TII->commuteInstruction(*Fold.UseMI, false);
17921797
}
17931798
}
1799+
1800+
for (MachineInstr *MI : ConstantFoldCandidates) {
1801+
if (tryConstantFoldOp(MI)) {
1802+
LLVM_DEBUG(dbgs() << "Constant folded " << *MI);
1803+
Changed = true;
1804+
}
1805+
}
17941806
return true;
17951807
}
17961808

llvm/test/CodeGen/AMDGPU/bit-op-reduce-width-known-bits.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,8 @@ define i64 @v_xor_i64_known_i32_from_range_use_out_of_block(i64 %x) {
105105
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
106106
; CHECK-NEXT: ; %bb.1: ; %inc
107107
; CHECK-NEXT: v_not_b32_e32 v2, v4
108-
; CHECK-NEXT: v_not_b32_e32 v3, 0
109108
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
110-
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
109+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v1, vcc
111110
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
112111
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
113112
; CHECK-NEXT: v_mov_b32_e32 v0, v2
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s
3+
---
4+
name: snork
5+
body: |
6+
bb.0:
7+
; CHECK-LABEL: name: snork
8+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
9+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
10+
; CHECK-NEXT: SI_RETURN
11+
%0:sreg_32 = S_MOV_B32 0
12+
%1:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3
13+
%2:sreg_32 = S_OR_B32 %1.sub0, %1.sub3, implicit-def dead $scc
14+
SI_RETURN
15+
...

llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ body: |
4343
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
4444
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
4545
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
46-
; GCN-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 0, [[DEF1]], implicit $exec
47-
; GCN-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
46+
; GCN-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
4847
%0:vgpr_32 = IMPLICIT_DEF
4948
%1:vgpr_32 = IMPLICIT_DEF
5049
%2:vgpr_32 = IMPLICIT_DEF

llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ body: |
88
; CHECK-LABEL: name: test_tryFoldZeroHighBits_skips_nonreg
99
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1010
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
11-
; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 65535, 0, implicit $exec
12-
; CHECK-NEXT: S_NOP 0, implicit [[V_AND_B32_e64_]]
11+
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
12+
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
1313
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1414
%1:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1
1515
%2:vgpr_32 = V_AND_B32_e64 65535, %1.sub0, implicit $exec

llvm/test/CodeGen/AMDGPU/sdiv64.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,12 +404,11 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
404404
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
405405
; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v0
406406
; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v1, vcc
407-
; GCN-IR-NEXT: v_not_b32_e32 v5, v10
407+
; GCN-IR-NEXT: v_not_b32_e32 v4, v10
408408
; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[6:7], v8
409-
; GCN-IR-NEXT: v_not_b32_e32 v4, 0
410-
; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, v5, v11
409+
; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, v4, v11
411410
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
412-
; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
411+
; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], -1, 0, vcc
413412
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
414413
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
415414
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0

llvm/test/CodeGen/AMDGPU/srem64.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,12 +380,11 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
380380
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
381381
; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v2
382382
; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v3, vcc
383-
; GCN-IR-NEXT: v_not_b32_e32 v7, v12
383+
; GCN-IR-NEXT: v_not_b32_e32 v6, v12
384384
; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8
385-
; GCN-IR-NEXT: v_not_b32_e32 v6, 0
386-
; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v7, v13
385+
; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13
387386
; GCN-IR-NEXT: v_mov_b32_e32 v12, 0
388-
; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc
387+
; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc
389388
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
390389
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
391390
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0

llvm/test/CodeGen/AMDGPU/udiv64.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,9 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
348348
; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v10
349349
; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v3, vcc
350350
; GCN-IR-NEXT: v_not_b32_e32 v0, v14
351-
; GCN-IR-NEXT: v_not_b32_e32 v1, 0
352351
; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v15
353352
; GCN-IR-NEXT: v_mov_b32_e32 v10, 0
354-
; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
353+
; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], -1, 0, vcc
355354
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
356355
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
357356
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0

llvm/test/CodeGen/AMDGPU/urem64.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,11 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
355355
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
356356
; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v2
357357
; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v3, vcc
358-
; GCN-IR-NEXT: v_not_b32_e32 v7, v12
358+
; GCN-IR-NEXT: v_not_b32_e32 v6, v12
359359
; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8
360-
; GCN-IR-NEXT: v_not_b32_e32 v6, 0
361-
; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v7, v13
360+
; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13
362361
; GCN-IR-NEXT: v_mov_b32_e32 v12, 0
363-
; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc
362+
; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc
364363
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
365364
; GCN-IR-NEXT: v_mov_b32_e32 v13, 0
366365
; GCN-IR-NEXT: v_mov_b32_e32 v7, 0

0 commit comments

Comments
 (0)