diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0ed06c37507af..2675e4e9d02e4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1761,7 +1761,9 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); - for (FoldCandidate &Fold : FoldList) { + for (auto FoldIt = FoldList.begin(), End = FoldList.end(); FoldIt != End; + ++FoldIt) { + FoldCandidate &Fold = *FoldIt; assert(!Fold.isReg() || Fold.Def.OpToFold); if (Fold.isReg() && Fold.getReg().isVirtual()) { Register Reg = Fold.getReg(); @@ -1785,9 +1787,13 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) { LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI); + // The instruction was folded into a copy, we have to skip any other + // occurence of UseMI in the fold list + End = std::remove_if(FoldIt + 1, End, [&](const FoldCandidate &F) { + return F.UseMI == Fold.UseMI; + }); Changed = true; } - } else if (Fold.Commuted) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-swdev-542372.ll b/llvm/test/CodeGen/AMDGPU/si-fold-operands-swdev-542372.ll new file mode 100644 index 0000000000000..cc88bd2dd3b36 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-swdev-542372.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s + +@lds = internal addrspace(3) global [5 x i32] poison + +define amdgpu_kernel void @kernel() { +; CHECK-LABEL: kernel: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: ds_read_b64 v[2:3], v0 +; CHECK-NEXT: ds_write_b32 v0, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 2, v2 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %land.rhs49 +; CHECK-NEXT: ds_read_b64 v[0:1], v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_3 +; CHECK-NEXT: ; %bb.2: ; %land.rhs57 +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ds_write_b64 v2, v[0:1] +; CHECK-NEXT: .LBB0_3: ; %land.end59 +; CHECK-NEXT: s_endpgm +entry: + %load.lds.0 = load <2 x i32>, ptr addrspace(3) @lds + %vecext.i55 = extractelement <2 x i32> %load.lds.0, i64 0 + %cmp3.i57 = icmp eq i32 %vecext.i55, 2 + store i32 0, ptr addrspace(3) @lds + br i1 %cmp3.i57, label %land.rhs49, label %land.end59 + +land.rhs49: ; preds = %entry + %load.lds.1 = load <2 x i32>, ptr addrspace(3) @lds + %vecext.i67 = extractelement <2 x i32> %load.lds.1, i64 0 + %cmp3.i69 = icmp eq i32 %vecext.i67, 1 + br i1 %cmp3.i69, label %land.rhs57, label %land.end59 + +land.rhs57: ; preds = %land.rhs49 + %rem.i.i.i = srem <2 x i32> %load.lds.0, %load.lds.1 + %ref.tmp.sroa.0.0.vec.extract.i.i = extractelement <2 x i32> %rem.i.i.i, i64 0 + store i32 %ref.tmp.sroa.0.0.vec.extract.i.i, ptr addrspace(3) @lds + store i32 %ref.tmp.sroa.0.0.vec.extract.i.i, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @lds, i32 4) + %load.lds.2 = load <2 x i32>, ptr addrspace(3) @lds + %vecext.i.i.i = extractelement <2 x i32> %load.lds.2, i64 0 + %cmp3.i.i.i = icmp ne i32 %vecext.i.i.i, 0 + %vecext.1.i.i.i = extractelement <2 x i32> %load.lds.2, i64 1 + %cmp3.1.i.i.i = icmp ne i32 %vecext.1.i.i.i, 0 + %.not.i.i = select i1 %cmp3.i.i.i, i1 true, i1 %cmp3.1.i.i.i + br i1 %.not.i.i, label %land.end59, label %if.end.i + +if.end.i: ; preds = %land.rhs57 + %and.i.i.i = and <2 x i32> %load.lds.2, splat (i32 1) + %ref.tmp.sroa.0.0.vec.extract.i20.i = extractelement <2 x i32> %and.i.i.i, i64 0 + br label %land.end59 + +land.end59: ; preds = %if.end.i, %land.rhs57, %land.rhs49, %entry + ret void +}