Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2517,6 +2517,12 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
return false;

// Do not try to hoist a constant GEP out of the loop via reassociation.
// Constant GEPs can often be folded into addressing modes, and reassociating
// them may inhibit CSE of a common base.
if (GEP->hasAllConstantIndices())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move the check to the front? isLoopInvariant and contains are a bit more expensive.

return false;

// This can only happen if !AllowSpeculation, otherwise this would already be
// handled.
// FIXME: Should we respect AllowSpeculation in these reassociation folds?
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
Original file line number Diff line number Diff line change
Expand Up @@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-NEXT: s_wait_alu 0xf1ff
Expand Down Expand Up @@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
Expand Down Expand Up @@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-NEXT: s_wait_alu 0xf1ff
Expand Down Expand Up @@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6982,7 +6982,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800
; CHECK-NEXT: s_cbranch_scc1 .LBB6_2
; CHECK-NEXT: .LBB6_3: ; %Flow9
; CHECK-NEXT: .LBB6_3: ; %Flow7
; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6
; CHECK-NEXT: s_cbranch_execz .LBB6_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
Expand Down Expand Up @@ -7048,7 +7048,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
; CHECK-NEXT: s_cbranch_scc0 .LBB6_5
; CHECK-NEXT: .LBB6_6: ; %Flow10
; CHECK-NEXT: .LBB6_6: ; %Flow8
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -7689,7 +7689,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2
; ALIGNED-NEXT: .LBB6_3: ; %Flow9
; ALIGNED-NEXT: .LBB6_3: ; %Flow7
; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB6_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
Expand Down Expand Up @@ -8316,7 +8316,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5
; ALIGNED-NEXT: .LBB6_6: ; %Flow10
; ALIGNED-NEXT: .LBB6_6: ; %Flow8
; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8
; ALIGNED-NEXT: s_clause 0x7
; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32
Expand Down Expand Up @@ -8369,7 +8369,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032
; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3
; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1
; UNROLL3-NEXT: .LBB6_4: ; %Flow7
; UNROLL3-NEXT: .LBB6_4: ; %Flow5
; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6
; UNROLL3-NEXT: s_cbranch_execz .LBB6_7
; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual
Expand Down Expand Up @@ -8403,7 +8403,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32
; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6
; UNROLL3-NEXT: .LBB6_7: ; %Flow8
; UNROLL3-NEXT: .LBB6_7: ; %Flow6
; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8
; UNROLL3-NEXT: s_setpc_b64 s[30:31]
entry:
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,10 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
; CHECK-NEXT: s_cbranch_execnz .LBB3_3
; CHECK-NEXT: ; %bb.1: ; %Flow34
; CHECK-NEXT: ; %bb.1: ; %Flow36
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
; CHECK-NEXT: s_cbranch_execnz .LBB3_10
; CHECK-NEXT: .LBB3_2: ; %Flow35
; CHECK-NEXT: .LBB3_2: ; %Flow37
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -494,7 +494,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB3_5
; CHECK-NEXT: .LBB3_6: ; %Flow29
; CHECK-NEXT: .LBB3_6: ; %Flow31
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
; CHECK-NEXT: s_cbranch_execz .LBB3_9
Expand All @@ -520,7 +520,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB3_8
; CHECK-NEXT: .LBB3_9: ; %Flow27
; CHECK-NEXT: .LBB3_9: ; %Flow29
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
Expand Down Expand Up @@ -556,7 +556,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_cbranch_execnz .LBB3_12
; CHECK-NEXT: .LBB3_13: ; %Flow33
; CHECK-NEXT: .LBB3_13: ; %Flow35
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB3_16
Expand Down Expand Up @@ -584,7 +584,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_cbranch_execnz .LBB3_15
; CHECK-NEXT: .LBB3_16: ; %Flow31
; CHECK-NEXT: .LBB3_16: ; %Flow33
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -907,10 +907,10 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
; CHECK-NEXT: s_cbranch_execnz .LBB6_3
; CHECK-NEXT: ; %bb.1: ; %Flow41
; CHECK-NEXT: ; %bb.1: ; %Flow39
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
; CHECK-NEXT: s_cbranch_execnz .LBB6_10
; CHECK-NEXT: .LBB6_2: ; %Flow42
; CHECK-NEXT: .LBB6_2: ; %Flow40
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB6_3: ; %memmove_copy_forward
Expand Down Expand Up @@ -940,7 +940,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB6_5
; CHECK-NEXT: .LBB6_6: ; %Flow36
; CHECK-NEXT: .LBB6_6: ; %Flow34
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
; CHECK-NEXT: s_cbranch_execz .LBB6_9
Expand All @@ -966,11 +966,11 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB6_8
; CHECK-NEXT: .LBB6_9: ; %Flow34
; CHECK-NEXT: .LBB6_9: ; %Flow32
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
Expand Down Expand Up @@ -1002,15 +1002,15 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_cbranch_execnz .LBB6_12
; CHECK-NEXT: .LBB6_13: ; %Flow40
; CHECK-NEXT: .LBB6_13: ; %Flow38
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB6_16
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB6_15: ; %memmove_bwd_main_loop
Expand All @@ -1030,7 +1030,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: global_store_dwordx4 v[12:13], v[8:11], off
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_cbranch_execnz .LBB6_15
; CHECK-NEXT: .LBB6_16: ; %Flow38
; CHECK-NEXT: .LBB6_16: ; %Flow36
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -1181,8 +1181,8 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: .LBB8_9: ; %Flow31
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
Expand Down Expand Up @@ -1219,10 +1219,10 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB8_16
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB8_15: ; %memmove_bwd_main_loop
Expand Down
Loading