File tree Expand file tree Collapse file tree 2 files changed +28
-0
lines changed
Expand file tree Collapse file tree 2 files changed +28
-0
lines changed Original file line number Diff line number Diff line change @@ -1684,6 +1684,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
16841684 }
16851685 }
16861686
1687+ if (Opc == AMDGPU::V_ADD_U32_e64 || Opc == AMDGPU::V_ADD_U32_e32) {
1688+ if (Src1Val == 0 ) {
1689+ // y = add x, 0 -> y = copy x
1690+ MI->removeOperand (Src1Idx);
1691+ mutateCopyOp (*MI, TII->get (AMDGPU::COPY));
1692+ }
1693+ }
1694+
16871695 return false ;
16881696}
16891697
Original file line number Diff line number Diff line change 1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s
3+
4+ @global_smem = external addrspace (3 ) global [0 x i8 ]
5+
6+ define amdgpu_kernel void @addzero () {
7+ ; GCN-LABEL: addzero:
8+ ; GCN: ; %bb.0: ; %.lr.ph
9+ ; GCN-NEXT: v_mov_b32_e32 v2, 0
10+ ; GCN-NEXT: v_and_b32_e32 v0, 1, v0
11+ ; GCN-NEXT: v_mov_b32_e32 v3, v2
12+ ; GCN-NEXT: ds_write_b64 v0, v[2:3]
13+ ; GCN-NEXT: s_endpgm
14+ .lr.ph:
15+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x ()
16+ %1 = and i32 %0 , 1
17+ %2 = getelementptr i8 , ptr addrspace (3 ) @global_smem , i32 %1
18+ store <4 x bfloat> zeroinitializer , ptr addrspace (3 ) %2 , align 8
19+ ret void
20+ }
You can’t perform that action at this time.
0 commit comments