Skip to content

Commit ec17ae2

Browse files
committed
Move to tryConstantFoldOp
Change-Id: I9b14559b4b5dc9c4bb383ebd517edcdc094a2e6c
1 parent 7434565 commit ec17ae2

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
16841684
}
16851685
}
16861686

1687+
if (Opc == AMDGPU::V_ADD_U32_e64 || Opc == AMDGPU::V_ADD_U32_e32) {
1688+
if (Src1Val == 0) {
1689+
// y = add x, 0 -> y = copy x
1690+
MI->removeOperand(Src1Idx);
1691+
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1692+
}
1693+
}
1694+
16871695
return false;
16881696
}
16891697

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s
3+
4+
@global_smem = external addrspace(3) global [0 x i8]
5+
6+
define amdgpu_kernel void @addzero() {
7+
; GCN-LABEL: addzero:
8+
; GCN: ; %bb.0: ; %.lr.ph
9+
; GCN-NEXT: v_mov_b32_e32 v2, 0
10+
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
11+
; GCN-NEXT: v_mov_b32_e32 v3, v2
12+
; GCN-NEXT: ds_write_b64 v0, v[2:3]
13+
; GCN-NEXT: s_endpgm
14+
.lr.ph:
15+
%0 = tail call i32 @llvm.amdgcn.workitem.id.x()
16+
%1 = and i32 %0, 1
17+
%2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1
18+
store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8
19+
ret void
20+
}

0 commit comments

Comments
 (0)