diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 82acb780cfb72..81459cf65d6c2 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -82,7 +82,7 @@ static cl::opt SplitEdgeProbabilityThreshold( "If the branch threshold is higher than this threshold, we allow " "speculative execution of up to 1 instruction to avoid branching to " "splitted critical edge"), - cl::init(40), cl::Hidden); + cl::init(35), cl::Hidden); static cl::opt SinkLoadInstsPerBlockThreshold( "machine-sink-load-instrs-threshold", diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index fb6575cc0ee83..fdc087e9c1991 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -632,20 +632,18 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A, ; ; CHECK-GI-LABEL: red_mla_dup_ext_u8_s8_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: cbz w2, .LBB5_3 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: cbz w2, .LBB5_9 ; CHECK-GI-NEXT: // %bb.1: // %for.body.preheader ; CHECK-GI-NEXT: cmp w2, #16 ; CHECK-GI-NEXT: mov w8, w2 -; CHECK-GI-NEXT: b.hs .LBB5_4 +; CHECK-GI-NEXT: b.hs .LBB5_3 ; CHECK-GI-NEXT: // %bb.2: ; CHECK-GI-NEXT: mov w10, #0 // =0x0 ; CHECK-GI-NEXT: mov x9, xzr ; CHECK-GI-NEXT: fmov s0, w10 -; CHECK-GI-NEXT: b .LBB5_8 -; CHECK-GI-NEXT: .LBB5_3: -; CHECK-GI-NEXT: mov w0, wzr -; CHECK-GI-NEXT: ret -; CHECK-GI-NEXT: .LBB5_4: // %vector.ph +; CHECK-GI-NEXT: b .LBB5_7 +; CHECK-GI-NEXT: .LBB5_3: // %vector.ph ; CHECK-GI-NEXT: lsl w9, w1, #8 ; CHECK-GI-NEXT: movi v0.2d, #0000000000000000 ; CHECK-GI-NEXT: movi v1.2d, #0000000000000000 @@ -654,7 +652,7 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A, ; CHECK-GI-NEXT: dup v2.8h, w9 ; CHECK-GI-NEXT: and x9, x8, #0xfffffff0 ; CHECK-GI-NEXT: mov x11, x9 -; CHECK-GI-NEXT: .LBB5_5: // %vector.body +; CHECK-GI-NEXT: .LBB5_4: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldp d3, d4, [x10, #-8] ; CHECK-GI-NEXT: subs x11, x11, #16 @@ -663,29 +661,31 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A, ; CHECK-GI-NEXT: ushll v4.8h, v4.8b, #0 ; CHECK-GI-NEXT: mla v0.8h, v2.8h, v3.8h ; CHECK-GI-NEXT: mla v1.8h, v2.8h, v4.8h -; CHECK-GI-NEXT: b.ne .LBB5_5 -; CHECK-GI-NEXT: // %bb.6: // %middle.block +; CHECK-GI-NEXT: b.ne .LBB5_4 +; CHECK-GI-NEXT: // %bb.5: // %middle.block ; CHECK-GI-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: cmp x9, x8 ; CHECK-GI-NEXT: addv h0, v0.8h -; CHECK-GI-NEXT: b.ne .LBB5_8 -; CHECK-GI-NEXT: // %bb.7: -; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: b.ne .LBB5_7 +; CHECK-GI-NEXT: // %bb.6: +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov w0, w8 ; CHECK-GI-NEXT: ret -; CHECK-GI-NEXT: .LBB5_8: // %for.body.preheader1 +; CHECK-GI-NEXT: .LBB5_7: // %for.body.preheader1 ; CHECK-GI-NEXT: sxtb w10, w1 -; CHECK-GI-NEXT: sub x8, x8, x9 +; CHECK-GI-NEXT: sub x11, x8, x9 ; CHECK-GI-NEXT: add x9, x0, x9 -; CHECK-GI-NEXT: .LBB5_9: // %for.body +; CHECK-GI-NEXT: .LBB5_8: // %for.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NEXT: ldrb w11, [x9], #1 +; CHECK-GI-NEXT: ldrb w8, [x9], #1 ; CHECK-GI-NEXT: fmov w12, s0 -; CHECK-GI-NEXT: subs x8, x8, #1 -; CHECK-GI-NEXT: mul w11, w11, w10 -; CHECK-GI-NEXT: add w0, w11, w12, uxth -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: b.ne .LBB5_9 -; CHECK-GI-NEXT: // %bb.10: // %for.cond.cleanup +; CHECK-GI-NEXT: subs x11, x11, #1 +; CHECK-GI-NEXT: mul w8, w8, w10 +; CHECK-GI-NEXT: add w8, w8, w12, uxth +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: b.ne .LBB5_8 +; CHECK-GI-NEXT: .LBB5_9: // %for.cond.cleanup +; CHECK-GI-NEXT: mov w0, w8 ; CHECK-GI-NEXT: ret entry: %conv2 = sext i8 %B to i16 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index 07ee87e880aff..1ca98f6015c11 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -412,6 +412,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE-NEXT: .cfi_def_cfa w29, 16 ; CHECK-APPLE-NEXT: .cfi_offset w30, -8 ; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: movi d0, #0000000000000000 ; CHECK-APPLE-NEXT: cbz w0, LBB3_2 ; CHECK-APPLE-NEXT: ; %bb.1: ; %gen_error ; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 @@ -420,10 +421,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 ; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] -; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: ret -; CHECK-APPLE-NEXT: LBB3_2: -; CHECK-APPLE-NEXT: movi d0, #0000000000000000 +; CHECK-APPLE-NEXT: LBB3_2: ; %common.ret ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 0c9ff3eee8231..70caf812ea6c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -200,6 +200,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0 ; CHECK-NEXT: s_mov_b32 s0, 1 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_ashr_i32 s6, s3, 31 @@ -330,15 +331,12 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0 ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 -; CHECK-NEXT: s_branch .LBB1_3 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 -; CHECK-NEXT: .LBB1_3: ; %Flow +; CHECK-NEXT: .LBB1_2: ; %Flow ; CHECK-NEXT: s_xor_b32 s0, s0, 1 ; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 -; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_cbranch_scc1 .LBB1_4 +; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -358,7 +356,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index df645888626c6..2fcbc41895f03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -194,6 +194,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0 ; CHECK-NEXT: s_mov_b32 s7, 1 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_ashr_i32 s6, s3, 31 @@ -322,15 +323,12 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0 ; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 -; CHECK-NEXT: s_branch .LBB1_3 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 -; CHECK-NEXT: .LBB1_3: ; %Flow +; CHECK-NEXT: .LBB1_2: ; %Flow ; CHECK-NEXT: s_xor_b32 s0, s7, 1 ; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 -; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_cbranch_scc1 .LBB1_4 +; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -348,7 +346,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index f5a901b024ef5..c9a5a92188256 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -193,6 +193,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0 ; CHECK-NEXT: s_mov_b32 s6, 1 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 @@ -318,15 +319,12 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CHECK-NEXT: s_branch .LBB1_3 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 -; CHECK-NEXT: .LBB1_3: ; %Flow +; CHECK-NEXT: .LBB1_2: ; %Flow ; CHECK-NEXT: s_xor_b32 s1, s6, 1 ; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 -; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_cbranch_scc1 .LBB1_4 +; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 @@ -345,7 +343,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 2be4b52198b45..06e51387c8f21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -190,6 +190,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0 ; CHECK-NEXT: s_mov_b32 s6, 1 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_mov_b32_e32 v0, s3 @@ -314,15 +315,12 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CHECK-NEXT: s_branch .LBB1_3 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 -; CHECK-NEXT: .LBB1_3: ; %Flow +; CHECK-NEXT: .LBB1_2: ; %Flow ; CHECK-NEXT: s_xor_b32 s1, s6, 1 ; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 -; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_cbranch_scc1 .LBB1_4 +; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 @@ -339,7 +337,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/artificial-terminators.mir b/llvm/test/CodeGen/AMDGPU/artificial-terminators.mir index 1a76cae68f164..9e84d979e8547 100644 --- a/llvm/test/CodeGen/AMDGPU/artificial-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/artificial-terminators.mir @@ -34,18 +34,14 @@ body: | ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.2(0x50000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_3]], [[S_MOV_B32_1]], implicit $exec ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_LT_I32_e64_]], implicit-def $scc - ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_XOR_B32_]] - ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_LT_I32_e64_]] - ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_XOR_B32_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -64,7 +60,7 @@ body: | ; CHECK-NEXT: S_BRANCH %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.5, [[S_OR_B32_]], %bb.2, [[S_OR_B32_]], %bb.3 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.1, [[S_OR_B32_]], %bb.2, [[S_OR_B32_]], %bb.3 ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll index f9ffa5ae57f3e..dfbb5f6a64042 100644 --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -9,44 +9,34 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx8 s[36:43], s[8:9], 0x0 +; CHECK-NEXT: s_load_dwordx8 s[20:27], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_mov_b32 s12, 0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_cmp_lg_u32 s40, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_8 -; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i -; CHECK-NEXT: s_cmp_eq_u32 s42, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_4 -; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i -; CHECK-NEXT: s_cmp_lg_u32 s43, 0 -; CHECK-NEXT: s_mov_b32 s17, 0 -; CHECK-NEXT: s_cselect_b32 s12, -1, 0 -; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 -; CHECK-NEXT: s_cbranch_vccz .LBB0_5 -; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: s_mov_b32 s36, 0 -; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 -; CHECK-NEXT: s_cbranch_vccz .LBB0_6 -; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: s_mov_b32 s14, s12 -; CHECK-NEXT: s_mov_b32 s15, s12 -; CHECK-NEXT: s_mov_b32 s13, s12 -; CHECK-NEXT: s_mov_b64 s[38:39], s[14:15] -; CHECK-NEXT: s_mov_b64 s[36:37], s[12:13] +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_lg_u32 s24, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: s_mov_b64 s[38:39], s[22:23] +; CHECK-NEXT: s_mov_b64 s[36:37], s[20:21] ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i -; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s41, 0 -; CHECK-NEXT: s_mov_b32 s36, 1.0 -; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 +; CHECK-NEXT: .LBB0_2: ; %if.end13.i.i ; CHECK-NEXT: s_mov_b32 s37, s36 ; CHECK-NEXT: s_mov_b32 s38, s36 +; CHECK-NEXT: s_cmp_eq_u32 s26, 0 ; CHECK-NEXT: s_mov_b32 s39, s36 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_6 +; CHECK-NEXT: ; %bb.3: ; %if.else251.i.i +; CHECK-NEXT: s_cmp_lg_u32 s27, 0 +; CHECK-NEXT: s_mov_b32 s17, 0 +; CHECK-NEXT: s_cselect_b32 s12, -1, 0 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 +; CHECK-NEXT: s_cbranch_vccz .LBB0_8 +; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_mov_b32 s36, 0 ; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 -; CHECK-NEXT: s_cbranch_vccnz .LBB0_7 -; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i +; CHECK-NEXT: s_cbranch_vccnz .LBB0_6 +; CHECK-NEXT: .LBB0_5: ; %if.end273.i.i ; CHECK-NEXT: s_add_u32 s12, s8, 40 ; CHECK-NEXT: s_addc_u32 s13, s9, 0 ; CHECK-NEXT: s_getpc_b64 s[18:19] @@ -72,13 +62,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_mov_b32 s37, s36 ; CHECK-NEXT: s_mov_b32 s38, s36 ; CHECK-NEXT: s_mov_b32 s39, s36 -; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i +; CHECK-NEXT: .LBB0_6: ; %if.end294.i.i ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit +; CHECK-NEXT: .LBB0_7: ; %kernel_direct_lighting.exit ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20 ; CHECK-NEXT: v_mov_b32_e32 v0, s36 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 @@ -88,6 +78,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_8: ; %if.then263.i.i +; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s25, 0 +; CHECK-NEXT: s_mov_b32 s36, 1.0 +; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 +; CHECK-NEXT: s_mov_b32 s37, s36 +; CHECK-NEXT: s_mov_b32 s38, s36 +; CHECK-NEXT: s_mov_b32 s39, s36 +; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 +; CHECK-NEXT: s_cbranch_vccz .LBB0_5 +; CHECK-NEXT: s_branch .LBB0_6 entry: %cmp5.i.i = icmp eq i32 %cmp5.i.i.arg, 0 br i1 %cmp5.i.i, label %if.end13.i.i, label %kernel_direct_lighting.exit diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll index d61c4b46596c0..ce0b79b0b358c 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -848,12 +848,13 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 ; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 ; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], -1 ; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 -; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x1000 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_cmp_lg_u32 s4, 0 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 -; GFX9-SDAG-NEXT: s_cbranch_scc0 .LBB7_6 +; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x1000 +; GFX9-SDAG-NEXT: s_cbranch_scc0 .LBB7_4 ; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.1 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 @@ -873,8 +874,11 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB7_5 -; GFX9-SDAG-NEXT: .LBB7_4: ; %bb.0 +; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX9-SDAG-NEXT: .LBB7_4: ; %Flow +; GFX9-SDAG-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GFX9-SDAG-NEXT: s_cbranch_vccnz .LBB7_6 +; GFX9-SDAG-NEXT: ; %bb.5: ; %bb.0 ; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 2 ; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0xfff ; GFX9-SDAG-NEXT: s_add_i32 s5, s5, 15 @@ -886,10 +890,8 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX9-SDAG-NEXT: s_add_i32 s32, s4, s5 ; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: .LBB7_5: ; %bb.2 +; GFX9-SDAG-NEXT: .LBB7_6: ; %bb.2 ; GFX9-SDAG-NEXT: s_endpgm -; GFX9-SDAG-NEXT: .LBB7_6: -; GFX9-SDAG-NEXT: s_branch .LBB7_4 ; ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow: ; GFX9-GISEL: ; %bb.0: ; %entry @@ -945,12 +947,13 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_control_flow: ; GFX11-SDAG: ; %bb.0: ; %entry ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 ; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, 64 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-SDAG-NEXT: s_cmp_lg_u32 s0, 0 ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_cbranch_scc0 .LBB7_6 +; GFX11-SDAG-NEXT: s_cbranch_scc0 .LBB7_4 ; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo @@ -967,14 +970,17 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB7_2 ; GFX11-SDAG-NEXT: ; %bb.3: -; GFX11-SDAG-NEXT: s_mov_b32 s2, s32 +; GFX11-SDAG-NEXT: s_mov_b32 s3, s32 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s2 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s3 +; GFX11-SDAG-NEXT: s_mov_b32 s2, 0 +; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s3 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 -; GFX11-SDAG-NEXT: s_cbranch_execnz .LBB7_5 -; GFX11-SDAG-NEXT: .LBB7_4: ; %bb.0 +; GFX11-SDAG-NEXT: .LBB7_4: ; %Flow +; GFX11-SDAG-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s2 +; GFX11-SDAG-NEXT: s_cbranch_vccnz .LBB7_6 +; GFX11-SDAG-NEXT: ; %bb.5: ; %bb.0 ; GFX11-SDAG-NEXT: s_lshl_b32 s0, s1, 2 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 ; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 @@ -985,10 +991,8 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 -; GFX11-SDAG-NEXT: .LBB7_5: ; %bb.2 +; GFX11-SDAG-NEXT: .LBB7_6: ; %bb.2 ; GFX11-SDAG-NEXT: s_endpgm -; GFX11-SDAG-NEXT: .LBB7_6: -; GFX11-SDAG-NEXT: s_branch .LBB7_4 ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow: ; GFX11-GISEL: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 56a3ce7281030..d8b1b40eb03ae 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -7231,7 +7231,8 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; GENERIC-NEXT: s_load_dword s0, s[4:5], 0x9 ; GENERIC-NEXT: s_waitcnt lgkmcnt(0) ; GENERIC-NEXT: s_cmp_lg_u32 s0, 0 -; GENERIC-NEXT: s_cbranch_scc0 .LBB19_4 +; GENERIC-NEXT: s_mov_b64 s[0:1], -1 +; GENERIC-NEXT: s_cbranch_scc0 .LBB19_2 ; GENERIC-NEXT: ; %bb.1: ; %bb4 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 @@ -7240,9 +7241,11 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; GENERIC-NEXT: ;;#ASMSTART ; GENERIC-NEXT: ; reg use v[0:3] ; GENERIC-NEXT: ;;#ASMEND -; GENERIC-NEXT: s_mov_b64 vcc, exec -; GENERIC-NEXT: s_cbranch_execnz .LBB19_3 -; GENERIC-NEXT: .LBB19_2: ; %bb1 +; GENERIC-NEXT: s_mov_b64 s[0:1], 0 +; GENERIC-NEXT: .LBB19_2: ; %Flow +; GENERIC-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; GENERIC-NEXT: s_cbranch_vccnz .LBB19_4 +; GENERIC-NEXT: ; %bb.3: ; %bb1 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 ; GENERIC-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc @@ -7250,15 +7253,12 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; GENERIC-NEXT: ;;#ASMSTART ; GENERIC-NEXT: ; reg use v[0:3] ; GENERIC-NEXT: ;;#ASMEND -; GENERIC-NEXT: .LBB19_3: ; %bb7 +; GENERIC-NEXT: .LBB19_4: ; %bb7 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 ; GENERIC-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GENERIC-NEXT: s_waitcnt vmcnt(0) ; GENERIC-NEXT: s_endpgm -; GENERIC-NEXT: .LBB19_4: -; GENERIC-NEXT: s_mov_b64 vcc, 0 -; GENERIC-NEXT: s_branch .LBB19_2 ; ; NOOPT-LABEL: extract_adjacent_blocks: ; NOOPT: ; %bb.0: ; %bb @@ -7367,7 +7367,8 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; SI-MOVREL-NEXT: s_load_dword s0, s[4:5], 0x9 ; SI-MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; SI-MOVREL-NEXT: s_cmp_lg_u32 s0, 0 -; SI-MOVREL-NEXT: s_cbranch_scc0 .LBB19_4 +; SI-MOVREL-NEXT: s_mov_b64 s[0:1], -1 +; SI-MOVREL-NEXT: s_cbranch_scc0 .LBB19_2 ; SI-MOVREL-NEXT: ; %bb.1: ; %bb4 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 @@ -7376,8 +7377,11 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; SI-MOVREL-NEXT: ;;#ASMSTART ; SI-MOVREL-NEXT: ; reg use v[0:3] ; SI-MOVREL-NEXT: ;;#ASMEND -; SI-MOVREL-NEXT: s_cbranch_execnz .LBB19_3 -; SI-MOVREL-NEXT: .LBB19_2: ; %bb1 +; SI-MOVREL-NEXT: s_mov_b64 s[0:1], 0 +; SI-MOVREL-NEXT: .LBB19_2: ; %Flow +; SI-MOVREL-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; SI-MOVREL-NEXT: s_cbranch_vccnz .LBB19_4 +; SI-MOVREL-NEXT: ; %bb.3: ; %bb1 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 ; SI-MOVREL-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc @@ -7385,66 +7389,68 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; SI-MOVREL-NEXT: ;;#ASMSTART ; SI-MOVREL-NEXT: ; reg use v[0:3] ; SI-MOVREL-NEXT: ;;#ASMEND -; SI-MOVREL-NEXT: .LBB19_3: ; %bb7 +; SI-MOVREL-NEXT: .LBB19_4: ; %bb7 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 ; SI-MOVREL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-MOVREL-NEXT: s_waitcnt vmcnt(0) ; SI-MOVREL-NEXT: s_endpgm -; SI-MOVREL-NEXT: .LBB19_4: -; SI-MOVREL-NEXT: s_branch .LBB19_2 ; ; VI-LABEL: extract_adjacent_blocks: ; VI: ; %bb.0: ; %bb ; VI-NEXT: s_load_dword s0, s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s0, 0 -; VI-NEXT: s_cbranch_scc0 .LBB19_4 +; VI-NEXT: s_mov_b64 s[0:1], -1 +; VI-NEXT: s_cbranch_scc0 .LBB19_2 ; VI-NEXT: ; %bb.1: ; %bb4 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: ;;#ASMSTART ; VI-NEXT: ; reg use v[0:3] ; VI-NEXT: ;;#ASMEND -; VI-NEXT: s_cbranch_execnz .LBB19_3 -; VI-NEXT: .LBB19_2: ; %bb1 +; VI-NEXT: s_mov_b64 s[0:1], 0 +; VI-NEXT: .LBB19_2: ; %Flow +; VI-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; VI-NEXT: s_cbranch_vccnz .LBB19_4 +; VI-NEXT: ; %bb.3: ; %bb1 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: ;;#ASMSTART ; VI-NEXT: ; reg use v[0:3] ; VI-NEXT: ;;#ASMEND -; VI-NEXT: .LBB19_3: ; %bb7 +; VI-NEXT: .LBB19_4: ; %bb7 ; VI-NEXT: flat_store_dword v[0:1], v0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB19_4: -; VI-NEXT: s_branch .LBB19_2 ; ; GFX9-IDXMODE-LABEL: extract_adjacent_blocks: ; GFX9-IDXMODE: ; %bb.0: ; %bb ; GFX9-IDXMODE-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX9-IDXMODE-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-IDXMODE-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-IDXMODE-NEXT: s_cbranch_scc0 .LBB19_4 +; GFX9-IDXMODE-NEXT: s_mov_b64 s[0:1], -1 +; GFX9-IDXMODE-NEXT: s_cbranch_scc0 .LBB19_2 ; GFX9-IDXMODE-NEXT: ; %bb.1: ; %bb4 ; GFX9-IDXMODE-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: ;;#ASMSTART ; GFX9-IDXMODE-NEXT: ; reg use v[0:3] ; GFX9-IDXMODE-NEXT: ;;#ASMEND -; GFX9-IDXMODE-NEXT: s_cbranch_execnz .LBB19_3 -; GFX9-IDXMODE-NEXT: .LBB19_2: ; %bb1 +; GFX9-IDXMODE-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-IDXMODE-NEXT: .LBB19_2: ; %Flow +; GFX9-IDXMODE-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; GFX9-IDXMODE-NEXT: s_cbranch_vccnz .LBB19_4 +; GFX9-IDXMODE-NEXT: ; %bb.3: ; %bb1 ; GFX9-IDXMODE-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: ;;#ASMSTART ; GFX9-IDXMODE-NEXT: ; reg use v[0:3] ; GFX9-IDXMODE-NEXT: ;;#ASMEND -; GFX9-IDXMODE-NEXT: .LBB19_3: ; %bb7 +; GFX9-IDXMODE-NEXT: .LBB19_4: ; %bb7 ; GFX9-IDXMODE-NEXT: global_store_dword v[0:1], v0, off ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: s_endpgm -; GFX9-IDXMODE-NEXT: .LBB19_4: -; GFX9-IDXMODE-NEXT: s_branch .LBB19_2 bb: %tmp = icmp eq i32 %arg, 0 br i1 %tmp, label %bb1, label %bb4 @@ -7473,7 +7479,8 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; GENERIC-NEXT: s_load_dword s0, s[4:5], 0x9 ; GENERIC-NEXT: s_waitcnt lgkmcnt(0) ; GENERIC-NEXT: s_cmp_lg_u32 s0, 0 -; GENERIC-NEXT: s_cbranch_scc0 .LBB20_4 +; GENERIC-NEXT: s_mov_b64 s[0:1], -1 +; GENERIC-NEXT: s_cbranch_scc0 .LBB20_2 ; GENERIC-NEXT: ; %bb.1: ; %bb4 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 @@ -7482,9 +7489,11 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; GENERIC-NEXT: ;;#ASMSTART ; GENERIC-NEXT: ; reg use v[0:3] ; GENERIC-NEXT: ;;#ASMEND -; GENERIC-NEXT: s_mov_b64 vcc, exec -; GENERIC-NEXT: s_cbranch_execnz .LBB20_3 -; GENERIC-NEXT: .LBB20_2: ; %bb1 +; GENERIC-NEXT: s_mov_b64 s[0:1], 0 +; GENERIC-NEXT: .LBB20_2: ; %Flow +; GENERIC-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; GENERIC-NEXT: s_cbranch_vccnz .LBB20_4 +; GENERIC-NEXT: ; %bb.3: ; %bb1 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 ; GENERIC-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc @@ -7492,15 +7501,12 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; GENERIC-NEXT: ;;#ASMSTART ; GENERIC-NEXT: ; reg use v[0:3] ; GENERIC-NEXT: ;;#ASMEND -; GENERIC-NEXT: .LBB20_3: ; %bb7 +; GENERIC-NEXT: .LBB20_4: ; %bb7 ; GENERIC-NEXT: s_mov_b32 s3, 0xf000 ; GENERIC-NEXT: s_mov_b32 s2, -1 ; GENERIC-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GENERIC-NEXT: s_waitcnt vmcnt(0) ; GENERIC-NEXT: s_endpgm -; GENERIC-NEXT: .LBB20_4: -; GENERIC-NEXT: s_mov_b64 vcc, 0 -; GENERIC-NEXT: s_branch .LBB20_2 ; ; NOOPT-LABEL: insert_adjacent_blocks: ; NOOPT: ; %bb.0: ; %bb @@ -7615,7 +7621,8 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; SI-MOVREL-NEXT: s_load_dword s0, s[4:5], 0x9 ; SI-MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; SI-MOVREL-NEXT: s_cmp_lg_u32 s0, 0 -; SI-MOVREL-NEXT: s_cbranch_scc0 .LBB20_4 +; SI-MOVREL-NEXT: s_mov_b64 s[0:1], -1 +; SI-MOVREL-NEXT: s_cbranch_scc0 .LBB20_2 ; SI-MOVREL-NEXT: ; %bb.1: ; %bb4 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 @@ -7624,8 +7631,11 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; SI-MOVREL-NEXT: ;;#ASMSTART ; SI-MOVREL-NEXT: ; reg use v[0:3] ; SI-MOVREL-NEXT: ;;#ASMEND -; SI-MOVREL-NEXT: s_cbranch_execnz .LBB20_3 -; SI-MOVREL-NEXT: .LBB20_2: ; %bb1 +; SI-MOVREL-NEXT: s_mov_b64 s[0:1], 0 +; SI-MOVREL-NEXT: .LBB20_2: ; %Flow +; SI-MOVREL-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; SI-MOVREL-NEXT: s_cbranch_vccnz .LBB20_4 +; SI-MOVREL-NEXT: ; %bb.3: ; %bb1 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 ; SI-MOVREL-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc @@ -7633,66 +7643,68 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; SI-MOVREL-NEXT: ;;#ASMSTART ; SI-MOVREL-NEXT: ; reg use v[0:3] ; SI-MOVREL-NEXT: ;;#ASMEND -; SI-MOVREL-NEXT: .LBB20_3: ; %bb7 +; SI-MOVREL-NEXT: .LBB20_4: ; %bb7 ; SI-MOVREL-NEXT: s_mov_b32 s3, 0xf000 ; SI-MOVREL-NEXT: s_mov_b32 s2, -1 ; SI-MOVREL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-MOVREL-NEXT: s_waitcnt vmcnt(0) ; SI-MOVREL-NEXT: s_endpgm -; SI-MOVREL-NEXT: .LBB20_4: -; SI-MOVREL-NEXT: s_branch .LBB20_2 ; ; VI-LABEL: insert_adjacent_blocks: ; VI: ; %bb.0: ; %bb ; VI-NEXT: s_load_dword s0, s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s0, 0 -; VI-NEXT: s_cbranch_scc0 .LBB20_4 +; VI-NEXT: s_mov_b64 s[0:1], -1 +; VI-NEXT: s_cbranch_scc0 .LBB20_2 ; VI-NEXT: ; %bb.1: ; %bb4 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: ;;#ASMSTART ; VI-NEXT: ; reg use v[0:3] ; VI-NEXT: ;;#ASMEND -; VI-NEXT: s_cbranch_execnz .LBB20_3 -; VI-NEXT: .LBB20_2: ; %bb1 +; VI-NEXT: s_mov_b64 s[0:1], 0 +; VI-NEXT: .LBB20_2: ; %Flow +; VI-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; VI-NEXT: s_cbranch_vccnz .LBB20_4 +; VI-NEXT: ; %bb.3: ; %bb1 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: ;;#ASMSTART ; VI-NEXT: ; reg use v[0:3] ; VI-NEXT: ;;#ASMEND -; VI-NEXT: .LBB20_3: ; %bb7 +; VI-NEXT: .LBB20_4: ; %bb7 ; VI-NEXT: flat_store_dwordx4 v[0:1], v[0:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB20_4: -; VI-NEXT: s_branch .LBB20_2 ; ; GFX9-IDXMODE-LABEL: insert_adjacent_blocks: ; GFX9-IDXMODE: ; %bb.0: ; %bb ; GFX9-IDXMODE-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX9-IDXMODE-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-IDXMODE-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-IDXMODE-NEXT: s_cbranch_scc0 .LBB20_4 +; GFX9-IDXMODE-NEXT: s_mov_b64 s[0:1], -1 +; GFX9-IDXMODE-NEXT: s_cbranch_scc0 .LBB20_2 ; GFX9-IDXMODE-NEXT: ; %bb.1: ; %bb4 ; GFX9-IDXMODE-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: ;;#ASMSTART ; GFX9-IDXMODE-NEXT: ; reg use v[0:3] ; GFX9-IDXMODE-NEXT: ;;#ASMEND -; GFX9-IDXMODE-NEXT: s_cbranch_execnz .LBB20_3 -; GFX9-IDXMODE-NEXT: .LBB20_2: ; %bb1 +; GFX9-IDXMODE-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-IDXMODE-NEXT: .LBB20_2: ; %Flow +; GFX9-IDXMODE-NEXT: s_andn2_b64 vcc, exec, s[0:1] +; GFX9-IDXMODE-NEXT: s_cbranch_vccnz .LBB20_4 +; GFX9-IDXMODE-NEXT: ; %bb.3: ; %bb1 ; GFX9-IDXMODE-NEXT: global_load_dwordx4 v[0:3], v[0:1], off glc ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: ;;#ASMSTART ; GFX9-IDXMODE-NEXT: ; reg use v[0:3] ; GFX9-IDXMODE-NEXT: ;;#ASMEND -; GFX9-IDXMODE-NEXT: .LBB20_3: ; %bb7 +; GFX9-IDXMODE-NEXT: .LBB20_4: ; %bb7 ; GFX9-IDXMODE-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-IDXMODE-NEXT: s_waitcnt vmcnt(0) ; GFX9-IDXMODE-NEXT: s_endpgm -; GFX9-IDXMODE-NEXT: .LBB20_4: -; GFX9-IDXMODE-NEXT: s_branch .LBB20_2 bb: %tmp = icmp eq i32 %arg, 0 br i1 %tmp, label %bb1, label %bb4 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index a9b8663a48dea..fc8da3e50189c 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -16,7 +16,16 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX942-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 ; GFX942-NEXT: s_branch .LBB0_2 -; GFX942-NEXT: .LBB0_1: ; %bb2 +; GFX942-NEXT: .LBB0_1: ; %Flow +; GFX942-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GFX942-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GFX942-NEXT: s_cbranch_vccz .LBB0_4 +; GFX942-NEXT: .LBB0_2: ; %bb +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX942-NEXT: s_mov_b64 s[4:5], -1 +; GFX942-NEXT: s_cbranch_vccnz .LBB0_1 +; GFX942-NEXT: ; %bb.3: ; %bb2 ; GFX942-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GFX942-NEXT: s_or_b32 s4, s3, 1 ; GFX942-NEXT: s_ashr_i32 s5, s3, 31 @@ -30,15 +39,9 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX942-NEXT: v_accvgpr_write_b32 a2, v2 ; GFX942-NEXT: v_accvgpr_write_b32 a3, v3 ; GFX942-NEXT: s_and_b32 s3, s5, s4 -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 ; GFX942-NEXT: v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[4:5], a[0:3] -; GFX942-NEXT: s_cbranch_execz .LBB0_4 -; GFX942-NEXT: .LBB0_2: ; %bb -; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX942-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX942-NEXT: s_cbranch_vccz .LBB0_1 -; GFX942-NEXT: ; %bb.3: -; GFX942-NEXT: ; implicit-def: $sgpr3 +; GFX942-NEXT: s_branch .LBB0_1 ; GFX942-NEXT: .LBB0_4: ; %common.ret ; GFX942-NEXT: s_endpgm ; @@ -55,12 +58,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 ; GFX908-NEXT: s_branch .LBB0_2 -; GFX908-NEXT: .LBB0_1: ; %bb2 +; GFX908-NEXT: .LBB0_1: ; %Flow +; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GFX908-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GFX908-NEXT: s_cbranch_vccz .LBB0_4 +; GFX908-NEXT: .LBB0_2: ; %bb +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], -1 +; GFX908-NEXT: s_cbranch_vccnz .LBB0_1 +; GFX908-NEXT: ; %bb.3: ; %bb2 ; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GFX908-NEXT: s_or_b32 s4, s3, 1 ; GFX908-NEXT: s_ashr_i32 s5, s3, 31 ; GFX908-NEXT: s_mov_b32 s3, s2 -; GFX908-NEXT: s_nop 3 ; GFX908-NEXT: v_accvgpr_read_b32 v0, a0 ; GFX908-NEXT: v_mov_b32_e32 v5, s3 ; GFX908-NEXT: v_mov_b32_e32 v4, s2 @@ -72,13 +83,8 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: v_accvgpr_write_b32 a3, v3 ; GFX908-NEXT: s_and_b32 s3, s5, s4 ; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[0:3], v[4:5], v[4:5], a[0:3] -; GFX908-NEXT: s_cbranch_execz .LBB0_4 -; GFX908-NEXT: .LBB0_2: ; %bb -; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX908-NEXT: s_cbranch_vccz .LBB0_1 -; GFX908-NEXT: ; %bb.3: -; GFX908-NEXT: ; implicit-def: $sgpr3 +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: s_branch .LBB0_1 ; GFX908-NEXT: .LBB0_4: ; %common.ret ; GFX908-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index 3e45a2d0df43d..d1a7c25f275bf 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -116,18 +116,19 @@ define amdgpu_kernel void @negated_cond_dominated_blocks(ptr addrspace(1) %arg1) ; GCN-NEXT: s_cbranch_scc1 .LBB1_6 ; GCN-NEXT: .LBB1_2: ; %bb4 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_mov_b64 s[8:9], -1 +; GCN-NEXT: ; implicit-def: $sgpr2 ; GCN-NEXT: s_mov_b64 vcc, s[0:1] ; GCN-NEXT: s_cbranch_vccz .LBB1_4 ; GCN-NEXT: ; %bb.3: ; %bb6 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_add_i32 s2, s3, 1 -; GCN-NEXT: s_mov_b64 vcc, exec -; GCN-NEXT: s_cbranch_execnz .LBB1_1 -; GCN-NEXT: s_branch .LBB1_5 -; GCN-NEXT: .LBB1_4: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: ; implicit-def: $sgpr2 -; GCN-NEXT: s_mov_b64 vcc, 0 -; GCN-NEXT: .LBB1_5: ; %bb5 +; GCN-NEXT: s_mov_b64 s[8:9], 0 +; GCN-NEXT: .LBB1_4: ; %Flow +; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 +; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9] +; GCN-NEXT: s_cbranch_vccnz .LBB1_1 +; GCN-NEXT: ; %bb.5: ; %bb5 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_lshl_b32 s2, s3, 5 ; GCN-NEXT: s_or_b32 s2, s2, 1 diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 0501602bbd8f4..1c7b0ddbd64bf 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1700,6 +1700,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; SI-LABEL: complex_loop: ; SI: ; %bb.0: ; %.entry ; SI-NEXT: s_cmp_lt_i32 s0, 1 +; SI-NEXT: v_mov_b32_e32 v2, -1 ; SI-NEXT: s_cbranch_scc1 .LBB15_7 ; SI-NEXT: ; %bb.1: ; %.lr.ph ; SI-NEXT: s_mov_b64 s[2:3], exec @@ -1731,10 +1732,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; SI-NEXT: s_branch .LBB15_2 ; SI-NEXT: .LBB15_6: ; %Flow ; SI-NEXT: s_or_b64 exec, exec, s[0:1] -; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm -; SI-NEXT: s_endpgm -; SI-NEXT: .LBB15_7: -; SI-NEXT: v_mov_b32_e32 v2, -1 +; SI-NEXT: .LBB15_7: ; %._crit_edge ; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm ; SI-NEXT: s_endpgm ; SI-NEXT: .LBB15_8: @@ -1744,6 +1742,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; ; GFX10-WAVE64-LABEL: complex_loop: ; GFX10-WAVE64: ; %bb.0: ; %.entry +; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 ; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1 ; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph @@ -1776,10 +1775,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; GFX10-WAVE64-NEXT: s_branch .LBB15_2 ; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm -; GFX10-WAVE64-NEXT: s_endpgm -; GFX10-WAVE64-NEXT: .LBB15_7: -; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 +; GFX10-WAVE64-NEXT: .LBB15_7: ; %._crit_edge ; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm ; GFX10-WAVE64-NEXT: s_endpgm ; GFX10-WAVE64-NEXT: .LBB15_8: @@ -1789,6 +1785,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; ; GFX10-WAVE32-LABEL: complex_loop: ; GFX10-WAVE32: ; %bb.0: ; %.entry +; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 ; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1 ; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph @@ -1821,10 +1818,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; GFX10-WAVE32-NEXT: s_branch .LBB15_2 ; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm -; GFX10-WAVE32-NEXT: s_endpgm -; GFX10-WAVE32-NEXT: .LBB15_7: -; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 +; GFX10-WAVE32-NEXT: .LBB15_7: ; %._crit_edge ; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm ; GFX10-WAVE32-NEXT: s_endpgm ; GFX10-WAVE32-NEXT: .LBB15_8: @@ -1834,6 +1828,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; ; GFX11-LABEL: complex_loop: ; GFX11: ; %bb.0: ; %.entry +; GFX11-NEXT: v_mov_b32_e32 v2, -1 ; GFX11-NEXT: s_cmp_lt_i32 s0, 1 ; GFX11-NEXT: s_cbranch_scc1 .LBB15_7 ; GFX11-NEXT: ; %bb.1: ; %.lr.ph @@ -1867,10 +1862,7 @@ define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { ; GFX11-NEXT: s_branch .LBB15_2 ; GFX11-NEXT: .LBB15_6: ; %Flow ; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done -; GFX11-NEXT: s_endpgm -; GFX11-NEXT: .LBB15_7: -; GFX11-NEXT: v_mov_b32_e32 v2, -1 +; GFX11-NEXT: .LBB15_7: ; %._crit_edge ; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB15_8: diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll index fb9139c0d1285..dc4d232e43b40 100644 --- a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll +++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -214,20 +214,20 @@ define i32 @f0(i1 %c0, i32 %v, ptr %p) { ; V7A-NEXT: tst r0, #1 ; V7A-NEXT: movt r3, #257 ; V7A-NEXT: and r1, r1, r3 -; V7A-NEXT: beq .LBB1_3 +; V7A-NEXT: beq .LBB1_2 ; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: mov r0, #0 ; V7A-NEXT: cmp r1, #0 -; V7A-NEXT: moveq r0, #0 -; V7A-NEXT: bxeq lr -; V7A-NEXT: .LBB1_2: @ %D -; V7A-NEXT: mov r0, #1 +; V7A-NEXT: movne r0, #1 ; V7A-NEXT: bx lr -; V7A-NEXT: .LBB1_3: @ %B +; V7A-NEXT: .LBB1_2: @ %B ; V7A-NEXT: mov r0, #1 ; V7A-NEXT: cmp r1, #0 ; V7A-NEXT: str r0, [r2] ; V7A-NEXT: mov r0, #0 -; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bxne lr +; V7A-NEXT: .LBB1_3: @ %D +; V7A-NEXT: mov r0, #1 ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f0: @@ -328,20 +328,20 @@ define i32 @f1(i1 %c0, i32 %v, ptr %p) { ; V7A-LABEL: f1: ; V7A: @ %bb.0: @ %E ; V7A-NEXT: tst r0, #1 -; V7A-NEXT: beq .LBB2_3 +; V7A-NEXT: beq .LBB2_2 ; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: mov r0, #0 ; V7A-NEXT: tst r1, #100663296 -; V7A-NEXT: moveq r0, #0 -; V7A-NEXT: bxeq lr -; V7A-NEXT: .LBB2_2: @ %D -; V7A-NEXT: mov r0, #1 +; V7A-NEXT: movne r0, #1 ; V7A-NEXT: bx lr -; V7A-NEXT: .LBB2_3: @ %B +; V7A-NEXT: .LBB2_2: @ %B ; V7A-NEXT: mov r0, #1 ; V7A-NEXT: tst r1, #100663296 ; V7A-NEXT: str r0, [r2] ; V7A-NEXT: mov r0, #0 -; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bxne lr +; V7A-NEXT: .LBB2_3: @ %D +; V7A-NEXT: mov r0, #1 ; V7A-NEXT: bx lr ; ; V7A-T-LABEL: f1: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sdiv-freebsd.ll b/llvm/test/CodeGen/Mips/llvm-ir/sdiv-freebsd.ll index 03ada2803cf74..b6653b904e9b5 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/sdiv-freebsd.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/sdiv-freebsd.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --mtriple=mips-unknown-freebsd -mcpu=mips2 -filetype=asm < %s -mcpu=mips2 | FileCheck %s -check-prefixes=MIPS2 ; ; Created from the following test case (PR121463) with @@ -21,7 +22,7 @@ define dso_local void @l2arc_write_interval() local_unnamed_addr #0 { ; MIPS2-NEXT: lui $1, %hi(l2arc_write_interval_wrote) ; MIPS2-NEXT: lw $1, %lo(l2arc_write_interval_wrote)($1) ; MIPS2-NEXT: beqz $1, $BB0_2 -; MIPS2-NEXT: nop +; MIPS2-NEXT: addiu $2, $zero, 0 ; MIPS2-NEXT: # %bb.1: # %if.then ; MIPS2-NEXT: lui $1, %hi(l2arc_feed_secs) ; MIPS2-NEXT: lw $1, %lo(l2arc_feed_secs)($1) @@ -30,8 +31,10 @@ define dso_local void @l2arc_write_interval() local_unnamed_addr #0 { ; MIPS2-NEXT: div $zero, $2, $1 ; MIPS2-NEXT: teq $1, $zero, 7 ; MIPS2-NEXT: mflo $2 -; MIPS2-NEXT: j $BB0_3 -; MIPS2-NEXT: nop +; MIPS2-NEXT: $BB0_2: # %if.end +; MIPS2-NEXT: lui $1, %hi(l2arc_write_interval_next) +; MIPS2-NEXT: jr $ra +; MIPS2-NEXT: sw $2, %lo(l2arc_write_interval_next)($1) entry: %0 = load i32, ptr @l2arc_write_interval_wrote, align 4 %tobool.not = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll index 35ddcfd9ba6d6..4110bb57ff97e 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -39,6 +39,9 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r6, 0 ; CHECK-NEXT: cmpwi cr1, r6, 0 +; CHECK-NEXT: mr r8, r3 +; CHECK-NEXT: li r9, 0 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq ; CHECK-NEXT: cmpwi cr1, r7, 0 ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 @@ -46,31 +49,30 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 ; CHECK-NEXT: # %bb.2: # %for.body.preheader -; CHECK-NEXT: slwi r8, r4, 1 +; CHECK-NEXT: slwi r3, r4, 1 ; CHECK-NEXT: li r10, 0 ; CHECK-NEXT: li r11, 0 ; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill -; CHECK-NEXT: add r8, r4, r8 +; CHECK-NEXT: add r3, r4, r3 ; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill -; CHECK-NEXT: add r9, r5, r8 +; CHECK-NEXT: add r3, r5, r3 ; CHECK-NEXT: add r5, r5, r4 -; CHECK-NEXT: add r8, r3, r5 -; CHECK-NEXT: add r9, r3, r9 +; CHECK-NEXT: add r5, r8, r5 +; CHECK-NEXT: add r8, r8, r3 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: .align 4 ; CHECK-NEXT: L..BB0_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz r12, 0(r8) -; CHECK-NEXT: lwzx r0, r8, r4 -; CHECK-NEXT: lwz r31, 0(r9) -; CHECK-NEXT: lwzx r30, r9, r4 +; CHECK-NEXT: lwz r12, 0(r5) +; CHECK-NEXT: lwzx r0, r5, r4 +; CHECK-NEXT: lwz r31, 0(r8) +; CHECK-NEXT: lwzx r30, r8, r4 +; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: addi r8, r8, 1 -; CHECK-NEXT: addi r9, r9, 1 ; CHECK-NEXT: mullw r12, r0, r12 ; CHECK-NEXT: mullw r12, r12, r31 ; CHECK-NEXT: mullw r12, r12, r30 -; CHECK-NEXT: addc r5, r5, r12 +; CHECK-NEXT: addc r9, r9, r12 ; CHECK-NEXT: addze r3, r3 ; CHECK-NEXT: addic r11, r11, 1 ; CHECK-NEXT: addze r10, r10 @@ -86,11 +88,8 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload -; CHECK-NEXT: mr r4, r5 -; CHECK-NEXT: blr -; CHECK-NEXT: L..BB0_6: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup +; CHECK-NEXT: mr r4, r9 ; CHECK-NEXT: blr entry: %add = add nsw i32 %base1, %offset diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index b71a360d1be12..069dac288ee05 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -35,35 +35,34 @@ define i64 @two_chain_same_offset_succ(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: two_chain_same_offset_succ: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB0_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sldi r3, r4, 1 +; CHECK-NEXT: add r8, r5, r4 ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r8, r4, r7 -; CHECK-NEXT: add r7, r5, r4 -; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: add r7, r3, r7 -; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: add r8, r7, r8 +; CHECK-NEXT: add r3, r5, r3 +; CHECK-NEXT: add r5, r7, r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ld r6, 0(r7) -; CHECK-NEXT: ldx r8, r7, r4 +; CHECK-NEXT: ld r6, 0(r8) +; CHECK-NEXT: ldx r7, r8, r4 ; CHECK-NEXT: ld r9, 0(r5) ; CHECK-NEXT: ldx r10, r5, r4 -; CHECK-NEXT: addi r7, r7, 1 +; CHECK-NEXT: addi r8, r8, 1 ; CHECK-NEXT: addi r5, r5, 1 -; CHECK-NEXT: mulld r6, r8, r6 +; CHECK-NEXT: mulld r6, r7, r6 ; CHECK-NEXT: mulld r6, r6, r9 ; CHECK-NEXT: maddld r3, r6, r10, r3 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 3 @@ -136,26 +135,27 @@ for.body: ; preds = %entry, %for.body define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB1_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: sldi r7, r4, 1 -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: add r8, r4, r7 -; CHECK-NEXT: sldi r9, r4, 2 +; CHECK-NEXT: sldi r8, r4, 1 +; CHECK-NEXT: add r5, r7, r5 +; CHECK-NEXT: add r9, r4, r8 +; CHECK-NEXT: sldi r10, r4, 2 ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r10, r4, r9 +; CHECK-NEXT: add r11, r4, r10 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ldx r6, r5, r4 -; CHECK-NEXT: ldx r11, r5, r7 -; CHECK-NEXT: ldx r12, r5, r8 -; CHECK-NEXT: ldx r0, r5, r9 -; CHECK-NEXT: mulld r6, r11, r6 -; CHECK-NEXT: ldx r30, r5, r10 +; CHECK-NEXT: ldx r7, r5, r8 +; CHECK-NEXT: ldx r12, r5, r9 +; CHECK-NEXT: ldx r0, r5, r10 +; CHECK-NEXT: mulld r6, r7, r6 +; CHECK-NEXT: ldx r30, r5, r11 ; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r6, r6, r12 ; CHECK-NEXT: mulld r6, r6, r0 @@ -164,9 +164,6 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 3 @@ -235,29 +232,27 @@ for.body: ; preds = %entry, %for.body define i64 @no_enough_elements_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: no_enough_elements_fail: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB2_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sldi r8, r4, 1 ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: add r4, r4, r7 +; CHECK-NEXT: add r5, r7, r5 +; CHECK-NEXT: add r4, r4, r8 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB2_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ld r6, 0(r5) -; CHECK-NEXT: ldx r8, r5, r7 +; CHECK-NEXT: ldx r7, r5, r8 ; CHECK-NEXT: ldx r9, r5, r4 ; CHECK-NEXT: addi r5, r5, 1 -; CHECK-NEXT: mulld r6, r8, r6 +; CHECK-NEXT: mulld r6, r7, r6 ; CHECK-NEXT: maddld r3, r6, r9, r3 ; CHECK-NEXT: bdnz .LBB2_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul1 = mul nsw i64 %offset, 3 @@ -320,33 +315,32 @@ for.body: ; preds = %entry, %for.body define i64 @no_reuseable_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: no_reuseable_offset_fail: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB3_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r9, r4, 3 +; CHECK-NEXT: sldi r3, r4, 3 ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: add r5, r7, r5 +; CHECK-NEXT: sldi r8, r4, 1 +; CHECK-NEXT: sldi r9, r4, 2 +; CHECK-NEXT: sub r4, r3, r4 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: sldi r7, r4, 1 -; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: sub r4, r9, r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB3_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ld r6, 0(r5) -; CHECK-NEXT: ldx r9, r5, r7 -; CHECK-NEXT: ldx r10, r5, r8 +; CHECK-NEXT: ldx r7, r5, r8 +; CHECK-NEXT: ldx r10, r5, r9 ; CHECK-NEXT: ldx r11, r5, r4 ; CHECK-NEXT: addi r5, r5, 1 -; CHECK-NEXT: mulld r6, r9, r6 +; CHECK-NEXT: mulld r6, r7, r6 ; CHECK-NEXT: mulld r6, r6, r10 ; CHECK-NEXT: maddld r3, r6, r11, r3 ; CHECK-NEXT: bdnz .LBB3_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul1 = shl nsw i64 %offset, 2 @@ -423,31 +417,33 @@ for.body: ; preds = %entry, %for.body define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: not_same_offset_fail: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB4_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: add r5, r7, r5 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: mulli r11, r4, 10 -; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: add r8, r4, r8 -; CHECK-NEXT: sldi r9, r4, 3 -; CHECK-NEXT: sub r10, r9, r4 -; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: mulli r12, r4, 10 +; CHECK-NEXT: sldi r3, r4, 2 +; CHECK-NEXT: add r9, r4, r3 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: sldi r10, r4, 3 +; CHECK-NEXT: sub r11, r10, r4 +; CHECK-NEXT: sldi r8, r4, 1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ldx r6, r5, r4 -; CHECK-NEXT: ldx r12, r5, r7 -; CHECK-NEXT: ldx r0, r5, r8 -; CHECK-NEXT: ldx r30, r5, r10 -; CHECK-NEXT: mulld r6, r12, r6 -; CHECK-NEXT: ldx r29, r5, r9 -; CHECK-NEXT: ldx r28, r5, r11 +; CHECK-NEXT: ldx r7, r5, r8 +; CHECK-NEXT: ldx r0, r5, r9 +; CHECK-NEXT: ldx r30, r5, r11 +; CHECK-NEXT: mulld r6, r7, r6 +; CHECK-NEXT: ldx r29, r5, r10 +; CHECK-NEXT: ldx r28, r5, r12 ; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r6, r6, r0 ; CHECK-NEXT: mulld r6, r6, r30 @@ -459,9 +455,6 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 5 @@ -541,35 +534,34 @@ for.body: ; preds = %entry, %for.body define i64 @two_chain_different_offsets_succ(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: two_chain_different_offsets_succ: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB5_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: add r7, r5, r4 +; CHECK-NEXT: sldi r3, r4, 2 +; CHECK-NEXT: add r8, r5, r4 ; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: add r7, r3, r7 +; CHECK-NEXT: add r3, r5, r3 +; CHECK-NEXT: add r8, r7, r8 ; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: add r5, r7, r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB5_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ld r6, 0(r7) -; CHECK-NEXT: ldx r8, r7, r4 +; CHECK-NEXT: ld r6, 0(r8) +; CHECK-NEXT: ldx r7, r8, r4 ; CHECK-NEXT: ld r9, 0(r5) ; CHECK-NEXT: ldx r10, r5, r4 -; CHECK-NEXT: addi r7, r7, 1 +; CHECK-NEXT: addi r8, r8, 1 ; CHECK-NEXT: addi r5, r5, 1 -; CHECK-NEXT: mulld r6, r8, r6 +; CHECK-NEXT: mulld r6, r7, r6 ; CHECK-NEXT: mulld r6, r6, r9 ; CHECK-NEXT: maddld r3, r6, r10, r3 ; CHECK-NEXT: bdnz .LBB5_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = mul nsw i64 %offset, 3 %mul2 = shl nsw i64 %offset, 2 @@ -639,15 +631,17 @@ for.body: ; preds = %entry, %for.body define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2, i64 %n) { ; CHECK-LABEL: two_chain_two_bases_succ: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r8, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpdi r7, 0 -; CHECK-NEXT: ble cr0, .LBB6_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: add r5, r5, r4 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: add r6, r6, r4 ; CHECK-NEXT: mtctr r7 ; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: add r6, r3, r6 +; CHECK-NEXT: add r5, r8, r3 +; CHECK-NEXT: add r6, r8, r6 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_2: # %for.body @@ -664,9 +658,6 @@ define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2 ; CHECK-NEXT: bdnz .LBB6_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB6_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = mul nsw i64 %offset, 3 %cmp44 = icmp sgt i64 %n, 0 diff --git a/llvm/test/CodeGen/PowerPC/ifcvt_cr_field.ll b/llvm/test/CodeGen/PowerPC/ifcvt_cr_field.ll index f5d1064047549..a219aea4fff7b 100644 --- a/llvm/test/CodeGen/PowerPC/ifcvt_cr_field.ll +++ b/llvm/test/CodeGen/PowerPC/ifcvt_cr_field.ll @@ -8,41 +8,35 @@ define dso_local signext i32 @test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) loc ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vcmpgtsw. 2, 2, 3 -; CHECK-NEXT: bge 6, .LBB0_2 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: bgelr 6 ; CHECK-NEXT: # %bb.1: # %land.rhs ; CHECK-NEXT: vcmpgtsw. 2, 4, 3 ; CHECK-NEXT: mfocrf 3, 2 ; CHECK-NEXT: rlwinm 3, 3, 25, 31, 31 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr ; ; CHECK-AIX-64-LABEL: test: ; CHECK-AIX-64: # %bb.0: # %entry ; CHECK-AIX-64-NEXT: vcmpgtsw. 2, 2, 3 -; CHECK-AIX-64-NEXT: bge 6, L..BB0_2 +; CHECK-AIX-64-NEXT: li 3, 0 +; CHECK-AIX-64-NEXT: bgelr 6 ; CHECK-AIX-64-NEXT: # %bb.1: # %land.rhs ; CHECK-AIX-64-NEXT: vcmpgtsw. 2, 4, 3 ; CHECK-AIX-64-NEXT: mfocrf 3, 2 ; CHECK-AIX-64-NEXT: rlwinm 3, 3, 25, 31, 31 ; CHECK-AIX-64-NEXT: blr -; CHECK-AIX-64-NEXT: L..BB0_2: -; CHECK-AIX-64-NEXT: li 3, 0 -; CHECK-AIX-64-NEXT: blr ; ; CHECK-AIX-32-LABEL: test: ; CHECK-AIX-32: # %bb.0: # %entry ; CHECK-AIX-32-NEXT: vcmpgtsw. 2, 2, 3 -; CHECK-AIX-32-NEXT: bge 6, L..BB0_2 +; CHECK-AIX-32-NEXT: li 3, 0 +; CHECK-AIX-32-NEXT: bgelr 6 ; CHECK-AIX-32-NEXT: # %bb.1: # %land.rhs ; CHECK-AIX-32-NEXT: vcmpgtsw. 2, 4, 3 ; CHECK-AIX-32-NEXT: mfocrf 3, 2 ; CHECK-AIX-32-NEXT: rlwinm 3, 3, 25, 31, 31 ; CHECK-AIX-32-NEXT: blr -; CHECK-AIX-32-NEXT: L..BB0_2: -; CHECK-AIX-32-NEXT: li 3, 0 -; CHECK-AIX-32-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b) %tobool.not = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll index 01bc1b4bc5bdb..814cadfa5d76c 100644 --- a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll +++ b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll @@ -89,6 +89,7 @@ define dso_local signext i32 @spillCRUNSET(ptr readonly %p1, i32 signext %p2, i3 ; CHECK-NOT: rlwinm [[REG2]], [[REG2]] ; CHECK: stw [[REG1]] ; CHECK: .LBB1_1: + entry: %and = and i32 %p3, 128 %tobool = icmp eq i32 %and, 0 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index cc38e250f183f..8595115f0c63e 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -21,32 +21,29 @@ define i64 @test_no_prep(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_no_prep: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB0_4 +; CHECK-NEXT: beq cr0, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: addi r5, r3, 4004 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addi r3, r3, 4004 ; CHECK-NEXT: li r6, -3 ; CHECK-NEXT: li r7, -2 ; CHECK-NEXT: li r8, -1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: ldx r9, r5, r6 -; CHECK-NEXT: ldx r10, r5, r7 -; CHECK-NEXT: ldx r11, r5, r8 -; CHECK-NEXT: ld r12, 0(r5) -; CHECK-NEXT: addi r5, r5, 1 +; CHECK-NEXT: ldx r9, r3, r6 +; CHECK-NEXT: ldx r10, r3, r7 +; CHECK-NEXT: ldx r11, r3, r8 +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mulld r9, r10, r9 ; CHECK-NEXT: mulld r9, r9, r11 -; CHECK-NEXT: maddld r3, r9, r12, r3 +; CHECK-NEXT: maddld r5, r9, r12, r5 ; CHECK-NEXT: bdnz .LBB0_2 -; CHECK-NEXT: # %bb.3: # %bb25 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB0_3: # %bb25 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 @@ -99,31 +96,28 @@ bb25: ; preds = %bb3, %bb define i64 @test_ds_prep(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_prep: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB1_4 +; CHECK-NEXT: beq cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: addi r7, r3, 4002 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, -1 +; CHECK-NEXT: li r3, -1 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: ldx r8, r7, r5 +; CHECK-NEXT: ldx r8, r7, r3 ; CHECK-NEXT: ld r9, 0(r7) ; CHECK-NEXT: ldx r10, r7, r6 ; CHECK-NEXT: ld r11, 4(r7) ; CHECK-NEXT: addi r7, r7, 1 ; CHECK-NEXT: mulld r8, r9, r8 ; CHECK-NEXT: mulld r8, r8, r10 -; CHECK-NEXT: maddld r3, r8, r11, r3 +; CHECK-NEXT: maddld r5, r8, r11, r5 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: # %bb25 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB1_3: # %bb25 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 @@ -186,14 +180,14 @@ bb25: ; preds = %bb3, %bb define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_max_number_reminder: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB2_4 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r10, r3, 4002 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, -1 +; CHECK-NEXT: li r3, -1 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: li r7, 3 ; CHECK-NEXT: li r8, 5 @@ -206,7 +200,7 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB2_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: ldx r11, r10, r5 +; CHECK-NEXT: ldx r11, r10, r3 ; CHECK-NEXT: ld r12, 0(r10) ; CHECK-NEXT: ldx r0, r10, r6 ; CHECK-NEXT: ldx r30, r10, r7 @@ -223,19 +217,17 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: mulld r11, r11, r28 ; CHECK-NEXT: mulld r11, r11, r27 ; CHECK-NEXT: mulld r11, r11, r26 -; CHECK-NEXT: maddld r3, r11, r25, r3 +; CHECK-NEXT: maddld r5, r11, r25, r5 ; CHECK-NEXT: bdnz .LBB2_2 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB2_4: # %bb45 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 @@ -303,30 +295,27 @@ bb45: ; preds = %bb3, %bb define dso_local i64 @test_update_ds_prep_interact(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_update_ds_prep_interact: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB3_4 +; CHECK-NEXT: beq cr0, .LBB3_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: addi r5, r3, 3998 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addi r3, r3, 3998 ; CHECK-NEXT: li r6, -1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: ldu r8, 4(r5) -; CHECK-NEXT: ldx r9, r5, r6 -; CHECK-NEXT: ldx r10, r5, r7 -; CHECK-NEXT: ld r11, 4(r5) +; CHECK-NEXT: ldu r8, 4(r3) +; CHECK-NEXT: ldx r9, r3, r6 +; CHECK-NEXT: ldx r10, r3, r7 +; CHECK-NEXT: ld r11, 4(r3) ; CHECK-NEXT: mulld r8, r8, r9 ; CHECK-NEXT: mulld r8, r8, r10 -; CHECK-NEXT: maddld r3, r8, r11, r3 +; CHECK-NEXT: maddld r5, r8, r11, r5 ; CHECK-NEXT: bdnz .LBB3_2 -; CHECK-NEXT: # %bb.3: # %bb26 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB3_3: # %bb26 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 @@ -380,31 +369,28 @@ bb26: ; preds = %bb3, %bb define i64 @test_update_ds_prep_nointeract(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_update_ds_prep_nointeract: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB4_4 +; CHECK-NEXT: beq cr0, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: addi r5, r3, 4000 -; CHECK-NEXT: addi r6, r3, 4003 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addi r6, r3, 4000 +; CHECK-NEXT: addi r3, r3, 4003 ; CHECK-NEXT: li r7, -1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: lbzu r8, 1(r5) -; CHECK-NEXT: ldx r9, r6, r7 -; CHECK-NEXT: ld r10, 0(r6) -; CHECK-NEXT: ld r11, 4(r6) -; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: lbzu r8, 1(r6) +; CHECK-NEXT: ldx r9, r3, r7 +; CHECK-NEXT: ld r10, 0(r3) +; CHECK-NEXT: ld r11, 4(r3) +; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mulld r8, r9, r8 ; CHECK-NEXT: mulld r8, r8, r10 -; CHECK-NEXT: maddld r3, r8, r11, r3 +; CHECK-NEXT: maddld r5, r8, r11, r5 ; CHECK-NEXT: bdnz .LBB4_2 -; CHECK-NEXT: # %bb.3: # %bb25 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB4_3: # %bb25 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 @@ -462,23 +448,23 @@ bb25: ; preds = %bb3, %bb define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %arg2) { ; CHECK-LABEL: test_ds_multiple_chains: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: cmplwi r5, 0 ; CHECK-NEXT: beq cr0, .LBB5_4 ; CHECK-NEXT: # %bb.1: # %bb4.preheader ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: addi r6, r3, 4001 +; CHECK-NEXT: addi r3, r3, 4001 ; CHECK-NEXT: addi r4, r4, 4001 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r7, 9 ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB5_2: # %bb4 ; CHECK-NEXT: # -; CHECK-NEXT: ld r8, 0(r6) -; CHECK-NEXT: ldx r9, r6, r7 -; CHECK-NEXT: ld r10, 4(r6) -; CHECK-NEXT: ld r11, 8(r6) -; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: ld r8, 0(r3) +; CHECK-NEXT: ldx r9, r3, r7 +; CHECK-NEXT: ld r10, 4(r3) +; CHECK-NEXT: ld r11, 8(r3) +; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mulld r8, r9, r8 ; CHECK-NEXT: ld r12, 0(r4) ; CHECK-NEXT: ldx r0, r4, r7 @@ -490,14 +476,12 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext % ; CHECK-NEXT: mulld r8, r8, r12 ; CHECK-NEXT: mulld r8, r8, r0 ; CHECK-NEXT: mulld r8, r8, r30 -; CHECK-NEXT: maddld r3, r8, r9, r3 +; CHECK-NEXT: maddld r6, r8, r9, r6 ; CHECK-NEXT: bdnz .LBB5_2 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r3, r5 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: addi r3, r5, 0 +; CHECK-NEXT: .LBB5_4: # %bb43 +; CHECK-NEXT: add r3, r6, r5 ; CHECK-NEXT: blr bb: %i = sext i32 %arg2 to i64 @@ -579,16 +563,17 @@ bb43: ; preds = %bb4, %bb define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_cross_basic_blocks: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: mr r5, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB6_9 +; CHECK-NEXT: beqlr cr0 ; CHECK-NEXT: # %bb.1: # %bb3 -; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: ld r5, .LC0@toc@l(r5) +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: addi r6, r3, 4009 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addi r5, r5, 4009 ; CHECK-NEXT: li r7, -7 ; CHECK-NEXT: li r8, -6 ; CHECK-NEXT: li r9, 1 @@ -596,23 +581,24 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: li r11, 1 ; CHECK-NEXT: li r12, 1 ; CHECK-NEXT: li r0, 1 -; CHECK-NEXT: ld r5, 0(r5) +; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 1 -; CHECK-NEXT: addi r5, r5, -1 +; CHECK-NEXT: addi r6, r3, -1 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: b .LBB6_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_2: # %bb18 ; CHECK-NEXT: # -; CHECK-NEXT: addi r30, r6, -9 +; CHECK-NEXT: addi r30, r5, -9 ; CHECK-NEXT: ld r30, 0(r30) ; CHECK-NEXT: add r0, r30, r0 -; CHECK-NEXT: ld r30, -8(r6) +; CHECK-NEXT: ld r30, -8(r5) ; CHECK-NEXT: add r12, r30, r12 ; CHECK-NEXT: .LBB6_3: # %bb49 ; CHECK-NEXT: # ; CHECK-NEXT: mulld r30, r12, r0 -; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r30, r30, r11 ; CHECK-NEXT: mulld r30, r30, r10 ; CHECK-NEXT: mulld r30, r30, r9 @@ -620,7 +606,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: bdz .LBB6_8 ; CHECK-NEXT: .LBB6_4: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: lbzu r30, 1(r5) +; CHECK-NEXT: lbzu r30, 1(r6) ; CHECK-NEXT: mulli r29, r30, 171 ; CHECK-NEXT: rlwinm r28, r29, 24, 8, 30 ; CHECK-NEXT: srwi r29, r29, 9 @@ -635,17 +621,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: bne cr0, .LBB6_7 ; CHECK-NEXT: # %bb.6: # %bb31 ; CHECK-NEXT: # -; CHECK-NEXT: ldx r30, r6, r7 +; CHECK-NEXT: ldx r30, r5, r7 ; CHECK-NEXT: add r11, r30, r11 -; CHECK-NEXT: ld r30, -4(r6) +; CHECK-NEXT: ld r30, -4(r5) ; CHECK-NEXT: add r10, r30, r10 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_7: # %bb40 ; CHECK-NEXT: # -; CHECK-NEXT: ldx r30, r6, r8 +; CHECK-NEXT: ldx r30, r5, r8 ; CHECK-NEXT: add r9, r30, r9 -; CHECK-NEXT: ld r30, 0(r6) +; CHECK-NEXT: ld r30, 0(r5) ; CHECK-NEXT: add r4, r30, r4 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: @@ -653,9 +639,6 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB6_9: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 @@ -756,12 +739,12 @@ bb64: ; preds = %bb49, %bb define float @test_ds_float(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_float: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: cmpwi r4, 0 -; CHECK-NEXT: ble cr0, .LBB7_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %bb2 ; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-NEXT: addi r3, r3, 4002 -; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 ; CHECK-NEXT: .p2align 4 @@ -779,9 +762,6 @@ define float @test_ds_float(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: bdnz .LBB7_2 ; CHECK-NEXT: # %bb.3: # %bb26 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB7_4: -; CHECK-NEXT: xxlxor f1, f1, f1 -; CHECK-NEXT: blr bb: %i = icmp sgt i32 %arg1, 0 br i1 %i, label %bb2, label %bb26 @@ -836,12 +816,12 @@ bb26: ; preds = %bb4, %bb define float @test_ds_combine_float_int(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_combine_float_int: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: cmpwi r4, 0 -; CHECK-NEXT: ble cr0, .LBB8_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %bb2 ; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-NEXT: addi r3, r3, 4002 -; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 ; CHECK-NEXT: .p2align 4 @@ -860,9 +840,6 @@ define float @test_ds_combine_float_int(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: bdnz .LBB8_2 ; CHECK-NEXT: # %bb.3: # %bb27 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB8_4: -; CHECK-NEXT: xxlxor f1, f1, f1 -; CHECK-NEXT: blr bb: %i = icmp sgt i32 %arg1, 0 br i1 %i, label %bb2, label %bb27 @@ -917,30 +894,27 @@ bb27: ; preds = %bb4, %bb define i64 @test_ds_lwa_prep(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_lwa_prep: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: cmpwi r4, 0 -; CHECK-NEXT: ble cr0, .LBB9_4 +; CHECK-NEXT: ble cr0, .LBB9_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: addi r5, r3, 2 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addi r3, r3, 2 ; CHECK-NEXT: li r6, -1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB9_2: # %bb3 ; CHECK-NEXT: # -; CHECK-NEXT: lwax r7, r5, r6 -; CHECK-NEXT: lwa r8, 0(r5) -; CHECK-NEXT: lwa r9, 4(r5) -; CHECK-NEXT: lwa r10, 8(r5) -; CHECK-NEXT: addi r5, r5, 1 +; CHECK-NEXT: lwax r7, r3, r6 +; CHECK-NEXT: lwa r8, 0(r3) +; CHECK-NEXT: lwa r9, 4(r3) +; CHECK-NEXT: lwa r10, 8(r3) +; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mulld r7, r8, r7 ; CHECK-NEXT: mulld r7, r7, r9 -; CHECK-NEXT: maddld r3, r7, r10, r3 +; CHECK-NEXT: maddld r5, r7, r10, r5 ; CHECK-NEXT: bdnz .LBB9_2 -; CHECK-NEXT: # %bb.3: # %bb29 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB9_4: -; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: .LBB9_3: # %bb29 +; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll index 7eef9a4644db1..6ed4e62c3de36 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll @@ -17,11 +17,13 @@ define i64 @foo(ptr %p, i32 signext %n, i32 signext %count) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r6, r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpwi r4, 0 -; CHECK-NEXT: ble cr0, .LBB0_4 +; CHECK-NEXT: blelr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi r6, r3, 5 ; CHECK-NEXT: addi r3, r4, -1 +; CHECK-NEXT: addi r6, r6, 5 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mtctr r3 @@ -37,9 +39,6 @@ define i64 @foo(ptr %p, i32 signext %n, i32 signext %count) { ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %cmp16 = icmp sgt i32 %n, 0 br i1 %cmp16, label %for.body.preheader, label %for.cond.cleanup @@ -82,27 +81,25 @@ for.body: ; preds = %for.body.preheader, define zeroext i8 @foo1(ptr %p, i32 signext %n, i32 signext %count) { ; CHECK-LABEL: foo1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: cmpwi r4, 0 -; CHECK-NEXT: ble cr0, .LBB1_4 +; CHECK-NEXT: ble cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: addi r4, r4, -1 ; CHECK-NEXT: sub r3, r3, r5 -; CHECK-NEXT: addi r6, r3, 1000 -; CHECK-NEXT: addi r3, r4, -1 -; CHECK-NEXT: clrldi r3, r3, 32 -; CHECK-NEXT: addi r3, r3, 1 -; CHECK-NEXT: mtctr r3 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r3, r3, 1000 +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lbzux r4, r6, r5 -; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: lbzux r4, r3, r5 +; CHECK-NEXT: add r6, r4, r6 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-NEXT: clrldi r3, r3, 56 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-NEXT: clrldi r3, r6, 56 ; CHECK-NEXT: blr entry: %cmp10 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll index 89e5147aecc5f..79b2cb045914f 100644 --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -252,17 +252,14 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-LABEL: testNestedPHI: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: # implicit-def: $acc0 ; CHECK-NEXT: beq cr0, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-NEXT: cmpwi r4, 1 -; CHECK-NEXT: bge cr0, .LBB3_3 -; CHECK-NEXT: b .LBB3_5 -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: # implicit-def: $acc0 +; CHECK-NEXT: .LBB3_2: # %if.end ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB3_5 -; CHECK-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-NEXT: # %bb.3: # %for.body.preheader ; CHECK-NEXT: addi r3, r4, -1 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: addi r3, r3, 1 @@ -284,17 +281,14 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-BE-LABEL: testNestedPHI: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: cmplwi r3, 0 +; CHECK-BE-NEXT: # implicit-def: $acc0 ; CHECK-BE-NEXT: beq cr0, .LBB3_2 ; CHECK-BE-NEXT: # %bb.1: # %if.then ; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-BE-NEXT: cmpwi r4, 1 -; CHECK-BE-NEXT: bge cr0, .LBB3_3 -; CHECK-BE-NEXT: b .LBB3_5 -; CHECK-BE-NEXT: .LBB3_2: -; CHECK-BE-NEXT: # implicit-def: $acc0 +; CHECK-BE-NEXT: .LBB3_2: # %if.end ; CHECK-BE-NEXT: cmpwi r4, 1 ; CHECK-BE-NEXT: blt cr0, .LBB3_5 -; CHECK-BE-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-BE-NEXT: # %bb.3: # %for.body.preheader ; CHECK-BE-NEXT: addi r3, r4, -1 ; CHECK-BE-NEXT: clrldi r3, r3, 32 ; CHECK-BE-NEXT: addi r3, r3, 1 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll index c36b3bfd50228..ced8d85a0e4dd 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll @@ -31,28 +31,23 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 { ; CHECK-NEXT: cmpdi cr1, r4, 0 ; CHECK-NEXT: cmpdi cr5, r5, 0 ; CHECK-NEXT: cmpldi cr6, r3, 0 -; CHECK-NEXT: beq cr6, .LBB0_3 +; CHECK-NEXT: # implicit-def: $r3 +; CHECK-NEXT: beq cr6, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb10 ; CHECK-NEXT: lwz r3, 0(r3) +; CHECK-NEXT: .LBB0_2: # %bb12 +; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: bc 12, 4*cr1+eq, .LBB0_4 -; CHECK-NEXT: .LBB0_2: # %bb14 +; CHECK-NEXT: # %bb.3: # %bb14 ; CHECK-NEXT: lwz r5, 0(r3) -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: # implicit-def: $r3 -; CHECK-NEXT: bc 4, 4*cr1+eq, .LBB0_2 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB0_5: # %bb16 +; CHECK-NEXT: .LBB0_4: # %bb16 ; CHECK-NEXT: crnot 4*cr1+lt, eq ; CHECK-NEXT: crnot 4*cr5+un, 4*cr5+eq -; CHECK-NEXT: bc 12, 4*cr5+eq, .LBB0_7 -; CHECK-NEXT: # %bb.6: # %bb18 -; CHECK-NEXT: lwz r4, 0(r3) -; CHECK-NEXT: b .LBB0_8 -; CHECK-NEXT: .LBB0_7: ; CHECK-NEXT: # implicit-def: $r4 -; CHECK-NEXT: .LBB0_8: # %bb20 +; CHECK-NEXT: bc 12, 4*cr5+eq, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %bb18 +; CHECK-NEXT: lwz r4, 0(r3) +; CHECK-NEXT: .LBB0_6: # %bb20 ; CHECK-NEXT: mfcr r12 ; CHECK-NEXT: cmpwi cr3, r4, -1 ; CHECK-NEXT: cmpwi cr2, r3, -1 @@ -62,38 +57,38 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 { ; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un ; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt ; CHECK-NEXT: # implicit-def: $x3 -; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10 -; CHECK-NEXT: # %bb.9: # %bb34 +; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_8 +; CHECK-NEXT: # %bb.7: # %bb34 ; CHECK-NEXT: ld r3, 0(r3) -; CHECK-NEXT: .LBB0_10: # %bb36 +; CHECK-NEXT: .LBB0_8: # %bb36 ; CHECK-NEXT: cmpwi cr2, r5, 0 ; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_12 -; CHECK-NEXT: # %bb.11: # %bb38 +; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb38 ; CHECK-NEXT: ld r4, 0(r3) -; CHECK-NEXT: .LBB0_12: # %bb40 +; CHECK-NEXT: .LBB0_10: # %bb40 ; CHECK-NEXT: crand 4*cr6+gt, 4*cr7+lt, 4*cr1+lt ; CHECK-NEXT: crand 4*cr6+lt, 4*cr6+lt, 4*cr5+un ; CHECK-NEXT: crnot 4*cr6+un, 4*cr1+eq ; CHECK-NEXT: # implicit-def: $x6 -; CHECK-NEXT: bc 4, 4*cr6+lt, .LBB0_14 -; CHECK-NEXT: # %bb.13: # %bb48 +; CHECK-NEXT: bc 4, 4*cr6+lt, .LBB0_12 +; CHECK-NEXT: # %bb.11: # %bb48 ; CHECK-NEXT: ld r6, 0(r3) -; CHECK-NEXT: .LBB0_14: # %bb50 +; CHECK-NEXT: .LBB0_12: # %bb50 ; CHECK-NEXT: cmpwi cr3, r5, -1 ; CHECK-NEXT: crand 4*cr7+lt, 4*cr2+lt, 4*cr6+un ; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_16 -; CHECK-NEXT: # %bb.15: # %bb52 +; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_14 +; CHECK-NEXT: # %bb.13: # %bb52 ; CHECK-NEXT: lwz r5, 0(r3) -; CHECK-NEXT: .LBB0_16: # %bb54 +; CHECK-NEXT: .LBB0_14: # %bb54 ; CHECK-NEXT: mfocrf r7, 128 ; CHECK-NEXT: stw r7, -4(r1) ; CHECK-NEXT: # implicit-def: $r7 -; CHECK-NEXT: bc 4, 4*cr7+lt, .LBB0_18 -; CHECK-NEXT: # %bb.17: # %bb56 +; CHECK-NEXT: bc 4, 4*cr7+lt, .LBB0_16 +; CHECK-NEXT: # %bb.15: # %bb56 ; CHECK-NEXT: lwz r7, 0(r3) -; CHECK-NEXT: .LBB0_18: # %bb58 +; CHECK-NEXT: .LBB0_16: # %bb58 ; CHECK-NEXT: lwz r6, 92(r6) ; CHECK-NEXT: cmpwi cr4, r7, 1 ; CHECK-NEXT: crand 4*cr7+un, 4*cr3+gt, 4*cr6+un @@ -106,10 +101,10 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 { ; CHECK-NEXT: crand 4*cr6+gt, 4*cr3+lt, 4*cr6+gt ; CHECK-NEXT: cmpwi r6, 1 ; CHECK-NEXT: crand 4*cr6+lt, lt, 4*cr6+lt -; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_20 -; CHECK-NEXT: # %bb.19: # %bb68 +; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_18 +; CHECK-NEXT: # %bb.17: # %bb68 ; CHECK-NEXT: ld r5, 0(r3) -; CHECK-NEXT: .LBB0_20: # %bb70 +; CHECK-NEXT: .LBB0_18: # %bb70 ; CHECK-NEXT: ld r6, 0(r3) ; CHECK-NEXT: lwz r9, -4(r1) ; CHECK-NEXT: crandc 4*cr5+gt, 4*cr5+gt, 4*cr7+eq diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll index 8df47808169be..fa97eddb09cc9 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll @@ -1920,6 +1920,7 @@ entry: define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P10-LABEL: shrinkwrap: ; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: li r4, 0 ; BE-P10-NEXT: cmpldi r3, 0 ; BE-P10-NEXT: beq cr0, L..BB2_2 ; BE-P10-NEXT: # %bb.1: # %if.end @@ -1937,19 +1938,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P10-NEXT: nop ; BE-P10-NEXT: lwz r4, 16(r31) ; BE-P10-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P10-NEXT: add r3, r4, r3 +; BE-P10-NEXT: add r4, r4, r3 ; BE-P10-NEXT: addi r1, r1, 144 ; BE-P10-NEXT: ld r0, 16(r1) -; BE-P10-NEXT: clrldi r3, r3, 32 ; BE-P10-NEXT: hashchk r0, -16(r1) ; BE-P10-NEXT: mtlr r0 -; BE-P10-NEXT: blr -; BE-P10-NEXT: L..BB2_2: -; BE-P10-NEXT: li r3, 0 +; BE-P10-NEXT: L..BB2_2: # %return +; BE-P10-NEXT: clrldi r3, r4, 32 ; BE-P10-NEXT: blr ; ; BE-P9-LABEL: shrinkwrap: ; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: li r4, 0 ; BE-P9-NEXT: cmpldi r3, 0 ; BE-P9-NEXT: beq cr0, L..BB2_2 ; BE-P9-NEXT: # %bb.1: # %if.end @@ -1967,20 +1967,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P9-NEXT: nop ; BE-P9-NEXT: lwz r4, 16(r31) ; BE-P9-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P9-NEXT: add r3, r4, r3 +; BE-P9-NEXT: add r4, r4, r3 ; BE-P9-NEXT: addi r1, r1, 144 ; BE-P9-NEXT: ld r0, 16(r1) -; BE-P9-NEXT: clrldi r3, r3, 32 -; BE-P9-NEXT: mtlr r0 ; BE-P9-NEXT: hashchk r0, -16(r1) -; BE-P9-NEXT: blr -; BE-P9-NEXT: L..BB2_2: -; BE-P9-NEXT: li r3, 0 +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: L..BB2_2: # %return +; BE-P9-NEXT: clrldi r3, r4, 32 ; BE-P9-NEXT: blr ; ; BE-P8-LABEL: shrinkwrap: ; BE-P8: # %bb.0: # %entry ; BE-P8-NEXT: cmpldi r3, 0 +; BE-P8-NEXT: li r4, 0 ; BE-P8-NEXT: beq cr0, L..BB2_2 ; BE-P8-NEXT: # %bb.1: # %if.end ; BE-P8-NEXT: mflr r0 @@ -1997,19 +1996,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P8-NEXT: nop ; BE-P8-NEXT: lwz r4, 16(r31) ; BE-P8-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P8-NEXT: add r3, r4, r3 +; BE-P8-NEXT: add r4, r4, r3 ; BE-P8-NEXT: addi r1, r1, 144 ; BE-P8-NEXT: ld r0, 16(r1) -; BE-P8-NEXT: clrldi r3, r3, 32 ; BE-P8-NEXT: hashchk r0, -16(r1) ; BE-P8-NEXT: mtlr r0 -; BE-P8-NEXT: blr -; BE-P8-NEXT: L..BB2_2: -; BE-P8-NEXT: li r3, 0 +; BE-P8-NEXT: L..BB2_2: # %return +; BE-P8-NEXT: clrldi r3, r4, 32 ; BE-P8-NEXT: blr ; ; BE-32BIT-P10-LABEL: shrinkwrap: ; BE-32BIT-P10: # %bb.0: # %entry +; BE-32BIT-P10-NEXT: li r4, 0 ; BE-32BIT-P10-NEXT: cmplwi r3, 0 ; BE-32BIT-P10-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P10-NEXT: # %bb.1: # %if.end @@ -2027,18 +2025,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P10-NEXT: nop ; BE-32BIT-P10-NEXT: lwz r4, 16(r31) ; BE-32BIT-P10-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P10-NEXT: add r3, r4, r3 +; BE-32BIT-P10-NEXT: add r4, r4, r3 ; BE-32BIT-P10-NEXT: addi r1, r1, 80 ; BE-32BIT-P10-NEXT: lwz r0, 8(r1) ; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) ; BE-32BIT-P10-NEXT: mtlr r0 -; BE-32BIT-P10-NEXT: blr -; BE-32BIT-P10-NEXT: L..BB2_2: -; BE-32BIT-P10-NEXT: li r3, 0 +; BE-32BIT-P10-NEXT: L..BB2_2: # %return +; BE-32BIT-P10-NEXT: mr r3, r4 ; BE-32BIT-P10-NEXT: blr ; ; BE-32BIT-P9-LABEL: shrinkwrap: ; BE-32BIT-P9: # %bb.0: # %entry +; BE-32BIT-P9-NEXT: li r4, 0 ; BE-32BIT-P9-NEXT: cmplwi r3, 0 ; BE-32BIT-P9-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P9-NEXT: # %bb.1: # %if.end @@ -2056,19 +2054,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P9-NEXT: nop ; BE-32BIT-P9-NEXT: lwz r4, 16(r31) ; BE-32BIT-P9-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P9-NEXT: add r3, r4, r3 +; BE-32BIT-P9-NEXT: add r4, r4, r3 ; BE-32BIT-P9-NEXT: addi r1, r1, 80 ; BE-32BIT-P9-NEXT: lwz r0, 8(r1) -; BE-32BIT-P9-NEXT: mtlr r0 ; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) -; BE-32BIT-P9-NEXT: blr -; BE-32BIT-P9-NEXT: L..BB2_2: -; BE-32BIT-P9-NEXT: li r3, 0 +; BE-32BIT-P9-NEXT: mtlr r0 +; BE-32BIT-P9-NEXT: L..BB2_2: # %return +; BE-32BIT-P9-NEXT: mr r3, r4 ; BE-32BIT-P9-NEXT: blr ; ; BE-32BIT-P8-LABEL: shrinkwrap: ; BE-32BIT-P8: # %bb.0: # %entry ; BE-32BIT-P8-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-NEXT: li r4, 0 ; BE-32BIT-P8-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P8-NEXT: # %bb.1: # %if.end ; BE-32BIT-P8-NEXT: mflr r0 @@ -2085,18 +2083,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P8-NEXT: nop ; BE-32BIT-P8-NEXT: lwz r4, 16(r31) ; BE-32BIT-P8-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P8-NEXT: add r3, r4, r3 +; BE-32BIT-P8-NEXT: add r4, r4, r3 ; BE-32BIT-P8-NEXT: addi r1, r1, 80 ; BE-32BIT-P8-NEXT: lwz r0, 8(r1) -; BE-32BIT-P8-NEXT: mtlr r0 ; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) -; BE-32BIT-P8-NEXT: blr -; BE-32BIT-P8-NEXT: L..BB2_2: -; BE-32BIT-P8-NEXT: li r3, 0 +; BE-32BIT-P8-NEXT: mtlr r0 +; BE-32BIT-P8-NEXT: L..BB2_2: # %return +; BE-32BIT-P8-NEXT: mr r3, r4 ; BE-32BIT-P8-NEXT: blr ; ; BE-P10-PRIV-LABEL: shrinkwrap: ; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: li r4, 0 ; BE-P10-PRIV-NEXT: cmpldi r3, 0 ; BE-P10-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-P10-PRIV-NEXT: # %bb.1: # %if.end @@ -2114,19 +2112,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P10-PRIV-NEXT: nop ; BE-P10-PRIV-NEXT: lwz r4, 16(r31) ; BE-P10-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: add r3, r4, r3 +; BE-P10-PRIV-NEXT: add r4, r4, r3 ; BE-P10-PRIV-NEXT: addi r1, r1, 144 ; BE-P10-PRIV-NEXT: ld r0, 16(r1) -; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 ; BE-P10-PRIV-NEXT: hashchkp r0, -16(r1) ; BE-P10-PRIV-NEXT: mtlr r0 -; BE-P10-PRIV-NEXT: blr -; BE-P10-PRIV-NEXT: L..BB2_2: -; BE-P10-PRIV-NEXT: li r3, 0 +; BE-P10-PRIV-NEXT: L..BB2_2: # %return +; BE-P10-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P10-PRIV-NEXT: blr ; ; BE-P9-PRIV-LABEL: shrinkwrap: ; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: li r4, 0 ; BE-P9-PRIV-NEXT: cmpldi r3, 0 ; BE-P9-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-P9-PRIV-NEXT: # %bb.1: # %if.end @@ -2144,20 +2141,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P9-PRIV-NEXT: nop ; BE-P9-PRIV-NEXT: lwz r4, 16(r31) ; BE-P9-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P9-PRIV-NEXT: add r3, r4, r3 +; BE-P9-PRIV-NEXT: add r4, r4, r3 ; BE-P9-PRIV-NEXT: addi r1, r1, 144 ; BE-P9-PRIV-NEXT: ld r0, 16(r1) -; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 -; BE-P9-PRIV-NEXT: mtlr r0 ; BE-P9-PRIV-NEXT: hashchkp r0, -16(r1) -; BE-P9-PRIV-NEXT: blr -; BE-P9-PRIV-NEXT: L..BB2_2: -; BE-P9-PRIV-NEXT: li r3, 0 +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: L..BB2_2: # %return +; BE-P9-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P9-PRIV-NEXT: blr ; ; BE-P8-PRIV-LABEL: shrinkwrap: ; BE-P8-PRIV: # %bb.0: # %entry ; BE-P8-PRIV-NEXT: cmpldi r3, 0 +; BE-P8-PRIV-NEXT: li r4, 0 ; BE-P8-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-P8-PRIV-NEXT: # %bb.1: # %if.end ; BE-P8-PRIV-NEXT: mflr r0 @@ -2174,19 +2170,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P8-PRIV-NEXT: nop ; BE-P8-PRIV-NEXT: lwz r4, 16(r31) ; BE-P8-PRIV-NEXT: ld r31, 136(r1) # 8-byte Folded Reload -; BE-P8-PRIV-NEXT: add r3, r4, r3 +; BE-P8-PRIV-NEXT: add r4, r4, r3 ; BE-P8-PRIV-NEXT: addi r1, r1, 144 ; BE-P8-PRIV-NEXT: ld r0, 16(r1) -; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 ; BE-P8-PRIV-NEXT: hashchkp r0, -16(r1) ; BE-P8-PRIV-NEXT: mtlr r0 -; BE-P8-PRIV-NEXT: blr -; BE-P8-PRIV-NEXT: L..BB2_2: -; BE-P8-PRIV-NEXT: li r3, 0 +; BE-P8-PRIV-NEXT: L..BB2_2: # %return +; BE-P8-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P8-PRIV-NEXT: blr ; ; BE-32BIT-P10-PRIV-LABEL: shrinkwrap: ; BE-32BIT-P10-PRIV: # %bb.0: # %entry +; BE-32BIT-P10-PRIV-NEXT: li r4, 0 ; BE-32BIT-P10-PRIV-NEXT: cmplwi r3, 0 ; BE-32BIT-P10-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P10-PRIV-NEXT: # %bb.1: # %if.end @@ -2204,18 +2199,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P10-PRIV-NEXT: nop ; BE-32BIT-P10-PRIV-NEXT: lwz r4, 16(r31) ; BE-32BIT-P10-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P10-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P10-PRIV-NEXT: add r4, r4, r3 ; BE-32BIT-P10-PRIV-NEXT: addi r1, r1, 80 ; BE-32BIT-P10-PRIV-NEXT: lwz r0, 8(r1) ; BE-32BIT-P10-PRIV-NEXT: hashchkp r0, -16(r1) ; BE-32BIT-P10-PRIV-NEXT: mtlr r0 -; BE-32BIT-P10-PRIV-NEXT: blr -; BE-32BIT-P10-PRIV-NEXT: L..BB2_2: -; BE-32BIT-P10-PRIV-NEXT: li r3, 0 +; BE-32BIT-P10-PRIV-NEXT: L..BB2_2: # %return +; BE-32BIT-P10-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P10-PRIV-NEXT: blr ; ; BE-32BIT-P9-PRIV-LABEL: shrinkwrap: ; BE-32BIT-P9-PRIV: # %bb.0: # %entry +; BE-32BIT-P9-PRIV-NEXT: li r4, 0 ; BE-32BIT-P9-PRIV-NEXT: cmplwi r3, 0 ; BE-32BIT-P9-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P9-PRIV-NEXT: # %bb.1: # %if.end @@ -2233,19 +2228,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P9-PRIV-NEXT: nop ; BE-32BIT-P9-PRIV-NEXT: lwz r4, 16(r31) ; BE-32BIT-P9-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P9-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P9-PRIV-NEXT: add r4, r4, r3 ; BE-32BIT-P9-PRIV-NEXT: addi r1, r1, 80 ; BE-32BIT-P9-PRIV-NEXT: lwz r0, 8(r1) -; BE-32BIT-P9-PRIV-NEXT: mtlr r0 ; BE-32BIT-P9-PRIV-NEXT: hashchkp r0, -16(r1) -; BE-32BIT-P9-PRIV-NEXT: blr -; BE-32BIT-P9-PRIV-NEXT: L..BB2_2: -; BE-32BIT-P9-PRIV-NEXT: li r3, 0 +; BE-32BIT-P9-PRIV-NEXT: mtlr r0 +; BE-32BIT-P9-PRIV-NEXT: L..BB2_2: # %return +; BE-32BIT-P9-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P9-PRIV-NEXT: blr ; ; BE-32BIT-P8-PRIV-LABEL: shrinkwrap: ; BE-32BIT-P8-PRIV: # %bb.0: # %entry ; BE-32BIT-P8-PRIV-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-PRIV-NEXT: li r4, 0 ; BE-32BIT-P8-PRIV-NEXT: beq cr0, L..BB2_2 ; BE-32BIT-P8-PRIV-NEXT: # %bb.1: # %if.end ; BE-32BIT-P8-PRIV-NEXT: mflr r0 @@ -2262,14 +2257,13 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P8-PRIV-NEXT: nop ; BE-32BIT-P8-PRIV-NEXT: lwz r4, 16(r31) ; BE-32BIT-P8-PRIV-NEXT: lwz r31, 76(r1) # 4-byte Folded Reload -; BE-32BIT-P8-PRIV-NEXT: add r3, r4, r3 +; BE-32BIT-P8-PRIV-NEXT: add r4, r4, r3 ; BE-32BIT-P8-PRIV-NEXT: addi r1, r1, 80 ; BE-32BIT-P8-PRIV-NEXT: lwz r0, 8(r1) -; BE-32BIT-P8-PRIV-NEXT: mtlr r0 ; BE-32BIT-P8-PRIV-NEXT: hashchkp r0, -16(r1) -; BE-32BIT-P8-PRIV-NEXT: blr -; BE-32BIT-P8-PRIV-NEXT: L..BB2_2: -; BE-32BIT-P8-PRIV-NEXT: li r3, 0 +; BE-32BIT-P8-PRIV-NEXT: mtlr r0 +; BE-32BIT-P8-PRIV-NEXT: L..BB2_2: # %return +; BE-32BIT-P8-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P8-PRIV-NEXT: blr entry: %local = alloca i32, align 4 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll index 829bf0f0d052e..81b24718cf1eb 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -2856,6 +2856,7 @@ entry: define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P10-LABEL: shrinkwrap: ; LE-P10: # %bb.0: # %entry +; LE-P10-NEXT: li r4, 0 ; LE-P10-NEXT: cmpldi r3, 0 ; LE-P10-NEXT: beq cr0, .LBB2_2 ; LE-P10-NEXT: # %bb.1: # %if.end @@ -2871,20 +2872,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P10-NEXT: mr r3, r4 ; LE-P10-NEXT: bl callee2@notoc ; LE-P10-NEXT: lwz r4, 16(r30) -; LE-P10-NEXT: add r3, r4, r3 +; LE-P10-NEXT: add r4, r4, r3 ; LE-P10-NEXT: addi r1, r1, 64 ; LE-P10-NEXT: ld r0, 16(r1) -; LE-P10-NEXT: clrldi r3, r3, 32 ; LE-P10-NEXT: hashchk r0, -24(r1) ; LE-P10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; LE-P10-NEXT: mtlr r0 -; LE-P10-NEXT: blr -; LE-P10-NEXT: .LBB2_2: -; LE-P10-NEXT: li r3, 0 +; LE-P10-NEXT: .LBB2_2: # %return +; LE-P10-NEXT: clrldi r3, r4, 32 ; LE-P10-NEXT: blr ; ; LE-P9-LABEL: shrinkwrap: ; LE-P9: # %bb.0: # %entry +; LE-P9-NEXT: li r4, 0 ; LE-P9-NEXT: cmpldi r3, 0 ; LE-P9-NEXT: beq cr0, .LBB2_2 ; LE-P9-NEXT: # %bb.1: # %if.end @@ -2901,21 +2901,20 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P9-NEXT: bl callee2 ; LE-P9-NEXT: nop ; LE-P9-NEXT: lwz r4, 16(r30) -; LE-P9-NEXT: add r3, r4, r3 +; LE-P9-NEXT: add r4, r4, r3 ; LE-P9-NEXT: addi r1, r1, 64 ; LE-P9-NEXT: ld r0, 16(r1) -; LE-P9-NEXT: clrldi r3, r3, 32 -; LE-P9-NEXT: mtlr r0 ; LE-P9-NEXT: hashchk r0, -24(r1) ; LE-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; LE-P9-NEXT: blr -; LE-P9-NEXT: .LBB2_2: -; LE-P9-NEXT: li r3, 0 +; LE-P9-NEXT: mtlr r0 +; LE-P9-NEXT: .LBB2_2: # %return +; LE-P9-NEXT: clrldi r3, r4, 32 ; LE-P9-NEXT: blr ; ; LE-P8-LABEL: shrinkwrap: ; LE-P8: # %bb.0: # %entry ; LE-P8-NEXT: cmpldi r3, 0 +; LE-P8-NEXT: li r4, 0 ; LE-P8-NEXT: beq cr0, .LBB2_2 ; LE-P8-NEXT: # %bb.1: # %if.end ; LE-P8-NEXT: mflr r0 @@ -2931,16 +2930,14 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P8-NEXT: bl callee2 ; LE-P8-NEXT: nop ; LE-P8-NEXT: lwz r4, 16(r30) -; LE-P8-NEXT: add r3, r4, r3 +; LE-P8-NEXT: add r4, r4, r3 ; LE-P8-NEXT: addi r1, r1, 64 ; LE-P8-NEXT: ld r0, 16(r1) -; LE-P8-NEXT: clrldi r3, r3, 32 ; LE-P8-NEXT: hashchk r0, -24(r1) -; LE-P8-NEXT: mtlr r0 ; LE-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; LE-P8-NEXT: blr -; LE-P8-NEXT: .LBB2_2: -; LE-P8-NEXT: li r3, 0 +; LE-P8-NEXT: mtlr r0 +; LE-P8-NEXT: .LBB2_2: # %return +; LE-P8-NEXT: clrldi r3, r4, 32 ; LE-P8-NEXT: blr ; ; LE-P10-O0-LABEL: shrinkwrap: @@ -3041,6 +3038,7 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; ; BE-P10-LABEL: shrinkwrap: ; BE-P10: # %bb.0: # %entry +; BE-P10-NEXT: li r4, 0 ; BE-P10-NEXT: cmpldi r3, 0 ; BE-P10-NEXT: beq cr0, .LBB2_2 ; BE-P10-NEXT: # %bb.1: # %if.end @@ -3058,19 +3056,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P10-NEXT: nop ; BE-P10-NEXT: lwz r4, 16(r30) ; BE-P10-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P10-NEXT: add r3, r4, r3 +; BE-P10-NEXT: add r4, r4, r3 ; BE-P10-NEXT: addi r1, r1, 144 ; BE-P10-NEXT: ld r0, 16(r1) -; BE-P10-NEXT: clrldi r3, r3, 32 ; BE-P10-NEXT: hashchk r0, -24(r1) ; BE-P10-NEXT: mtlr r0 -; BE-P10-NEXT: blr -; BE-P10-NEXT: .LBB2_2: -; BE-P10-NEXT: li r3, 0 +; BE-P10-NEXT: .LBB2_2: # %return +; BE-P10-NEXT: clrldi r3, r4, 32 ; BE-P10-NEXT: blr ; ; BE-P9-LABEL: shrinkwrap: ; BE-P9: # %bb.0: # %entry +; BE-P9-NEXT: li r4, 0 ; BE-P9-NEXT: cmpldi r3, 0 ; BE-P9-NEXT: beq cr0, .LBB2_2 ; BE-P9-NEXT: # %bb.1: # %if.end @@ -3088,20 +3085,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P9-NEXT: nop ; BE-P9-NEXT: lwz r4, 16(r30) ; BE-P9-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P9-NEXT: add r3, r4, r3 +; BE-P9-NEXT: add r4, r4, r3 ; BE-P9-NEXT: addi r1, r1, 144 ; BE-P9-NEXT: ld r0, 16(r1) -; BE-P9-NEXT: clrldi r3, r3, 32 -; BE-P9-NEXT: mtlr r0 ; BE-P9-NEXT: hashchk r0, -24(r1) -; BE-P9-NEXT: blr -; BE-P9-NEXT: .LBB2_2: -; BE-P9-NEXT: li r3, 0 +; BE-P9-NEXT: mtlr r0 +; BE-P9-NEXT: .LBB2_2: # %return +; BE-P9-NEXT: clrldi r3, r4, 32 ; BE-P9-NEXT: blr ; ; BE-P8-LABEL: shrinkwrap: ; BE-P8: # %bb.0: # %entry ; BE-P8-NEXT: cmpldi r3, 0 +; BE-P8-NEXT: li r4, 0 ; BE-P8-NEXT: beq cr0, .LBB2_2 ; BE-P8-NEXT: # %bb.1: # %if.end ; BE-P8-NEXT: mflr r0 @@ -3118,15 +3114,13 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P8-NEXT: nop ; BE-P8-NEXT: lwz r4, 16(r30) ; BE-P8-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P8-NEXT: add r3, r4, r3 +; BE-P8-NEXT: add r4, r4, r3 ; BE-P8-NEXT: addi r1, r1, 144 ; BE-P8-NEXT: ld r0, 16(r1) -; BE-P8-NEXT: clrldi r3, r3, 32 ; BE-P8-NEXT: hashchk r0, -24(r1) ; BE-P8-NEXT: mtlr r0 -; BE-P8-NEXT: blr -; BE-P8-NEXT: .LBB2_2: -; BE-P8-NEXT: li r3, 0 +; BE-P8-NEXT: .LBB2_2: # %return +; BE-P8-NEXT: clrldi r3, r4, 32 ; BE-P8-NEXT: blr ; ; BE-32BIT-P10-LABEL: shrinkwrap: @@ -3135,6 +3129,7 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P10-NEXT: stw r0, 4(r1) ; BE-32BIT-P10-NEXT: hashst r0, -16(r1) ; BE-32BIT-P10-NEXT: stwu r1, -32(r1) +; BE-32BIT-P10-NEXT: li r4, 0 ; BE-32BIT-P10-NEXT: cmplwi r3, 0 ; BE-32BIT-P10-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P10-NEXT: beq cr0, .LBB2_2 @@ -3146,13 +3141,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P10-NEXT: mr r3, r4 ; BE-32BIT-P10-NEXT: bl callee2 ; BE-32BIT-P10-NEXT: lwz r4, 16(r30) -; BE-32BIT-P10-NEXT: add r3, r4, r3 -; BE-32BIT-P10-NEXT: b .LBB2_3 -; BE-32BIT-P10-NEXT: .LBB2_2: -; BE-32BIT-P10-NEXT: li r3, 0 -; BE-32BIT-P10-NEXT: .LBB2_3: # %return +; BE-32BIT-P10-NEXT: add r4, r4, r3 +; BE-32BIT-P10-NEXT: .LBB2_2: # %return ; BE-32BIT-P10-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload ; BE-32BIT-P10-NEXT: lwz r0, 36(r1) +; BE-32BIT-P10-NEXT: mr r3, r4 ; BE-32BIT-P10-NEXT: addi r1, r1, 32 ; BE-32BIT-P10-NEXT: hashchk r0, -16(r1) ; BE-32BIT-P10-NEXT: mtlr r0 @@ -3164,6 +3157,7 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P9-NEXT: stw r0, 4(r1) ; BE-32BIT-P9-NEXT: hashst r0, -16(r1) ; BE-32BIT-P9-NEXT: stwu r1, -32(r1) +; BE-32BIT-P9-NEXT: li r4, 0 ; BE-32BIT-P9-NEXT: cmplwi r3, 0 ; BE-32BIT-P9-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P9-NEXT: beq cr0, .LBB2_2 @@ -3175,13 +3169,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P9-NEXT: mr r3, r4 ; BE-32BIT-P9-NEXT: bl callee2 ; BE-32BIT-P9-NEXT: lwz r4, 16(r30) -; BE-32BIT-P9-NEXT: add r3, r4, r3 -; BE-32BIT-P9-NEXT: b .LBB2_3 -; BE-32BIT-P9-NEXT: .LBB2_2: -; BE-32BIT-P9-NEXT: li r3, 0 -; BE-32BIT-P9-NEXT: .LBB2_3: # %return +; BE-32BIT-P9-NEXT: add r4, r4, r3 +; BE-32BIT-P9-NEXT: .LBB2_2: # %return ; BE-32BIT-P9-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload ; BE-32BIT-P9-NEXT: lwz r0, 36(r1) +; BE-32BIT-P9-NEXT: mr r3, r4 ; BE-32BIT-P9-NEXT: addi r1, r1, 32 ; BE-32BIT-P9-NEXT: mtlr r0 ; BE-32BIT-P9-NEXT: hashchk r0, -16(r1) @@ -3194,6 +3186,7 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P8-NEXT: hashst r0, -16(r1) ; BE-32BIT-P8-NEXT: stwu r1, -32(r1) ; BE-32BIT-P8-NEXT: cmplwi r3, 0 +; BE-32BIT-P8-NEXT: li r4, 0 ; BE-32BIT-P8-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P8-NEXT: beq cr0, .LBB2_2 ; BE-32BIT-P8-NEXT: # %bb.1: # %if.end @@ -3204,13 +3197,11 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-32BIT-P8-NEXT: mr r3, r4 ; BE-32BIT-P8-NEXT: bl callee2 ; BE-32BIT-P8-NEXT: lwz r4, 16(r30) -; BE-32BIT-P8-NEXT: add r3, r4, r3 -; BE-32BIT-P8-NEXT: b .LBB2_3 -; BE-32BIT-P8-NEXT: .LBB2_2: -; BE-32BIT-P8-NEXT: li r3, 0 -; BE-32BIT-P8-NEXT: .LBB2_3: # %return +; BE-32BIT-P8-NEXT: add r4, r4, r3 +; BE-32BIT-P8-NEXT: .LBB2_2: # %return ; BE-32BIT-P8-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload ; BE-32BIT-P8-NEXT: lwz r0, 36(r1) +; BE-32BIT-P8-NEXT: mr r3, r4 ; BE-32BIT-P8-NEXT: addi r1, r1, 32 ; BE-32BIT-P8-NEXT: mtlr r0 ; BE-32BIT-P8-NEXT: hashchk r0, -16(r1) @@ -3218,6 +3209,7 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; ; LE-P10-PRIV-LABEL: shrinkwrap: ; LE-P10-PRIV: # %bb.0: # %entry +; LE-P10-PRIV-NEXT: li r4, 0 ; LE-P10-PRIV-NEXT: cmpldi r3, 0 ; LE-P10-PRIV-NEXT: beq cr0, .LBB2_2 ; LE-P10-PRIV-NEXT: # %bb.1: # %if.end @@ -3233,20 +3225,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P10-PRIV-NEXT: mr r3, r4 ; LE-P10-PRIV-NEXT: bl callee2@notoc ; LE-P10-PRIV-NEXT: lwz r4, 16(r30) -; LE-P10-PRIV-NEXT: add r3, r4, r3 +; LE-P10-PRIV-NEXT: add r4, r4, r3 ; LE-P10-PRIV-NEXT: addi r1, r1, 64 ; LE-P10-PRIV-NEXT: ld r0, 16(r1) -; LE-P10-PRIV-NEXT: clrldi r3, r3, 32 ; LE-P10-PRIV-NEXT: hashchkp r0, -24(r1) ; LE-P10-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; LE-P10-PRIV-NEXT: mtlr r0 -; LE-P10-PRIV-NEXT: blr -; LE-P10-PRIV-NEXT: .LBB2_2: -; LE-P10-PRIV-NEXT: li r3, 0 +; LE-P10-PRIV-NEXT: .LBB2_2: # %return +; LE-P10-PRIV-NEXT: clrldi r3, r4, 32 ; LE-P10-PRIV-NEXT: blr ; ; LE-P9-PRIV-LABEL: shrinkwrap: ; LE-P9-PRIV: # %bb.0: # %entry +; LE-P9-PRIV-NEXT: li r4, 0 ; LE-P9-PRIV-NEXT: cmpldi r3, 0 ; LE-P9-PRIV-NEXT: beq cr0, .LBB2_2 ; LE-P9-PRIV-NEXT: # %bb.1: # %if.end @@ -3263,21 +3254,20 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P9-PRIV-NEXT: bl callee2 ; LE-P9-PRIV-NEXT: nop ; LE-P9-PRIV-NEXT: lwz r4, 16(r30) -; LE-P9-PRIV-NEXT: add r3, r4, r3 +; LE-P9-PRIV-NEXT: add r4, r4, r3 ; LE-P9-PRIV-NEXT: addi r1, r1, 64 ; LE-P9-PRIV-NEXT: ld r0, 16(r1) -; LE-P9-PRIV-NEXT: clrldi r3, r3, 32 -; LE-P9-PRIV-NEXT: mtlr r0 ; LE-P9-PRIV-NEXT: hashchkp r0, -24(r1) ; LE-P9-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; LE-P9-PRIV-NEXT: blr -; LE-P9-PRIV-NEXT: .LBB2_2: -; LE-P9-PRIV-NEXT: li r3, 0 +; LE-P9-PRIV-NEXT: mtlr r0 +; LE-P9-PRIV-NEXT: .LBB2_2: # %return +; LE-P9-PRIV-NEXT: clrldi r3, r4, 32 ; LE-P9-PRIV-NEXT: blr ; ; LE-P8-PRIV-LABEL: shrinkwrap: ; LE-P8-PRIV: # %bb.0: # %entry ; LE-P8-PRIV-NEXT: cmpldi r3, 0 +; LE-P8-PRIV-NEXT: li r4, 0 ; LE-P8-PRIV-NEXT: beq cr0, .LBB2_2 ; LE-P8-PRIV-NEXT: # %bb.1: # %if.end ; LE-P8-PRIV-NEXT: mflr r0 @@ -3293,20 +3283,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; LE-P8-PRIV-NEXT: bl callee2 ; LE-P8-PRIV-NEXT: nop ; LE-P8-PRIV-NEXT: lwz r4, 16(r30) -; LE-P8-PRIV-NEXT: add r3, r4, r3 +; LE-P8-PRIV-NEXT: add r4, r4, r3 ; LE-P8-PRIV-NEXT: addi r1, r1, 64 ; LE-P8-PRIV-NEXT: ld r0, 16(r1) -; LE-P8-PRIV-NEXT: clrldi r3, r3, 32 ; LE-P8-PRIV-NEXT: hashchkp r0, -24(r1) -; LE-P8-PRIV-NEXT: mtlr r0 ; LE-P8-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; LE-P8-PRIV-NEXT: blr -; LE-P8-PRIV-NEXT: .LBB2_2: -; LE-P8-PRIV-NEXT: li r3, 0 +; LE-P8-PRIV-NEXT: mtlr r0 +; LE-P8-PRIV-NEXT: .LBB2_2: # %return +; LE-P8-PRIV-NEXT: clrldi r3, r4, 32 ; LE-P8-PRIV-NEXT: blr ; ; BE-P10-PRIV-LABEL: shrinkwrap: ; BE-P10-PRIV: # %bb.0: # %entry +; BE-P10-PRIV-NEXT: li r4, 0 ; BE-P10-PRIV-NEXT: cmpldi r3, 0 ; BE-P10-PRIV-NEXT: beq cr0, .LBB2_2 ; BE-P10-PRIV-NEXT: # %bb.1: # %if.end @@ -3324,19 +3313,18 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P10-PRIV-NEXT: nop ; BE-P10-PRIV-NEXT: lwz r4, 16(r30) ; BE-P10-PRIV-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P10-PRIV-NEXT: add r3, r4, r3 +; BE-P10-PRIV-NEXT: add r4, r4, r3 ; BE-P10-PRIV-NEXT: addi r1, r1, 144 ; BE-P10-PRIV-NEXT: ld r0, 16(r1) -; BE-P10-PRIV-NEXT: clrldi r3, r3, 32 ; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1) ; BE-P10-PRIV-NEXT: mtlr r0 -; BE-P10-PRIV-NEXT: blr -; BE-P10-PRIV-NEXT: .LBB2_2: -; BE-P10-PRIV-NEXT: li r3, 0 +; BE-P10-PRIV-NEXT: .LBB2_2: # %return +; BE-P10-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P10-PRIV-NEXT: blr ; ; BE-P9-PRIV-LABEL: shrinkwrap: ; BE-P9-PRIV: # %bb.0: # %entry +; BE-P9-PRIV-NEXT: li r4, 0 ; BE-P9-PRIV-NEXT: cmpldi r3, 0 ; BE-P9-PRIV-NEXT: beq cr0, .LBB2_2 ; BE-P9-PRIV-NEXT: # %bb.1: # %if.end @@ -3354,20 +3342,19 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P9-PRIV-NEXT: nop ; BE-P9-PRIV-NEXT: lwz r4, 16(r30) ; BE-P9-PRIV-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P9-PRIV-NEXT: add r3, r4, r3 +; BE-P9-PRIV-NEXT: add r4, r4, r3 ; BE-P9-PRIV-NEXT: addi r1, r1, 144 ; BE-P9-PRIV-NEXT: ld r0, 16(r1) -; BE-P9-PRIV-NEXT: clrldi r3, r3, 32 -; BE-P9-PRIV-NEXT: mtlr r0 ; BE-P9-PRIV-NEXT: hashchkp r0, -24(r1) -; BE-P9-PRIV-NEXT: blr -; BE-P9-PRIV-NEXT: .LBB2_2: -; BE-P9-PRIV-NEXT: li r3, 0 +; BE-P9-PRIV-NEXT: mtlr r0 +; BE-P9-PRIV-NEXT: .LBB2_2: # %return +; BE-P9-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P9-PRIV-NEXT: blr ; ; BE-P8-PRIV-LABEL: shrinkwrap: ; BE-P8-PRIV: # %bb.0: # %entry ; BE-P8-PRIV-NEXT: cmpldi r3, 0 +; BE-P8-PRIV-NEXT: li r4, 0 ; BE-P8-PRIV-NEXT: beq cr0, .LBB2_2 ; BE-P8-PRIV-NEXT: # %bb.1: # %if.end ; BE-P8-PRIV-NEXT: mflr r0 @@ -3384,15 +3371,13 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 { ; BE-P8-PRIV-NEXT: nop ; BE-P8-PRIV-NEXT: lwz r4, 16(r30) ; BE-P8-PRIV-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; BE-P8-PRIV-NEXT: add r3, r4, r3 +; BE-P8-PRIV-NEXT: add r4, r4, r3 ; BE-P8-PRIV-NEXT: addi r1, r1, 144 ; BE-P8-PRIV-NEXT: ld r0, 16(r1) -; BE-P8-PRIV-NEXT: clrldi r3, r3, 32 ; BE-P8-PRIV-NEXT: hashchkp r0, -24(r1) ; BE-P8-PRIV-NEXT: mtlr r0 -; BE-P8-PRIV-NEXT: blr -; BE-P8-PRIV-NEXT: .LBB2_2: -; BE-P8-PRIV-NEXT: li r3, 0 +; BE-P8-PRIV-NEXT: .LBB2_2: # %return +; BE-P8-PRIV-NEXT: clrldi r3, r4, 32 ; BE-P8-PRIV-NEXT: blr entry: %local = alloca i32, align 4 diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll index 12d0b056ca886..6f7f2601f9405 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -6,6 +6,7 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-LABEL: shrinkwrapme: ; POWERPC64: # %bb.0: # %entry +; POWERPC64-NEXT: li 5, 0 ; POWERPC64-NEXT: cmpwi 4, 0 ; POWERPC64-NEXT: ble 0, .LBB0_4 ; POWERPC64-NEXT: # %bb.1: # %for.body.preheader @@ -31,12 +32,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill ; POWERPC64-NEXT: mtctr 4 -; POWERPC64-NEXT: li 4, 0 ; POWERPC64-NEXT: .p2align 4 ; POWERPC64-NEXT: .LBB0_2: # %for.body ; POWERPC64-NEXT: # ; POWERPC64-NEXT: #APP -; POWERPC64-NEXT: add 4, 3, 4 +; POWERPC64-NEXT: add 5, 3, 5 ; POWERPC64-NEXT: #NO_APP ; POWERPC64-NEXT: bdnz .LBB0_2 ; POWERPC64-NEXT: # %bb.3: @@ -44,7 +44,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; POWERPC64-NEXT: extsw 3, 4 ; POWERPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload @@ -59,14 +58,13 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-NEXT: ld 16, -128(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload -; POWERPC64-NEXT: blr -; POWERPC64-NEXT: .LBB0_4: -; POWERPC64-NEXT: li 4, 0 -; POWERPC64-NEXT: extsw 3, 4 +; POWERPC64-NEXT: .LBB0_4: # %for.cond.cleanup +; POWERPC64-NEXT: extsw 3, 5 ; POWERPC64-NEXT: blr ; ; POWERPC32-AIX-LABEL: shrinkwrapme: ; POWERPC32-AIX: # %bb.0: # %entry +; POWERPC32-AIX-NEXT: li 5, 0 ; POWERPC32-AIX-NEXT: cmpwi 4, 0 ; POWERPC32-AIX-NEXT: ble 0, L..BB0_4 ; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader @@ -89,12 +87,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: mtctr 4 -; POWERPC32-AIX-NEXT: li 4, 0 ; POWERPC32-AIX-NEXT: .align 4 ; POWERPC32-AIX-NEXT: L..BB0_2: # %for.body ; POWERPC32-AIX-NEXT: # ; POWERPC32-AIX-NEXT: #APP -; POWERPC32-AIX-NEXT: add 4, 3, 4 +; POWERPC32-AIX-NEXT: add 5, 3, 5 ; POWERPC32-AIX-NEXT: #NO_APP ; POWERPC32-AIX-NEXT: bdnz L..BB0_2 ; POWERPC32-AIX-NEXT: # %bb.3: @@ -102,7 +99,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 28, -16(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: mr 3, 4 ; POWERPC32-AIX-NEXT: lwz 27, -20(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 26, -24(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 25, -28(1) # 4-byte Folded Reload @@ -117,13 +113,13 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC32-AIX-NEXT: lwz 16, -64(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload -; POWERPC32-AIX-NEXT: blr -; POWERPC32-AIX-NEXT: L..BB0_4: -; POWERPC32-AIX-NEXT: li 3, 0 +; POWERPC32-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC32-AIX-NEXT: mr 3, 5 ; POWERPC32-AIX-NEXT: blr ; ; POWERPC64-AIX-LABEL: shrinkwrapme: ; POWERPC64-AIX: # %bb.0: # %entry +; POWERPC64-AIX-NEXT: li 5, 0 ; POWERPC64-AIX-NEXT: cmpwi 4, 1 ; POWERPC64-AIX-NEXT: blt 0, L..BB0_4 ; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader @@ -149,12 +145,11 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: mtctr 4 -; POWERPC64-AIX-NEXT: li 4, 0 ; POWERPC64-AIX-NEXT: .align 4 ; POWERPC64-AIX-NEXT: L..BB0_2: # %for.body ; POWERPC64-AIX-NEXT: # ; POWERPC64-AIX-NEXT: #APP -; POWERPC64-AIX-NEXT: add 4, 3, 4 +; POWERPC64-AIX-NEXT: add 5, 3, 5 ; POWERPC64-AIX-NEXT: #NO_APP ; POWERPC64-AIX-NEXT: bdnz L..BB0_2 ; POWERPC64-AIX-NEXT: # %bb.3: @@ -162,7 +157,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: extsw 3, 4 ; POWERPC64-AIX-NEXT: ld 27, -40(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 26, -48(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 25, -56(1) # 8-byte Folded Reload @@ -177,10 +171,8 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { ; POWERPC64-AIX-NEXT: ld 16, -128(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload -; POWERPC64-AIX-NEXT: blr -; POWERPC64-AIX-NEXT: L..BB0_4: -; POWERPC64-AIX-NEXT: li 4, 0 -; POWERPC64-AIX-NEXT: extsw 3, 4 +; POWERPC64-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC64-AIX-NEXT: extsw 3, 5 ; POWERPC64-AIX-NEXT: blr entry: %cmp5 = icmp sgt i32 %lim, 0 diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index b9df47d6d6452..7b8cacfe567d9 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1750,6 +1750,7 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: stwu 1, -32(1) ; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: stw 0, 36(1) ; CHECK-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 30, 24(1) # 4-byte Folded Spill @@ -1768,10 +1769,7 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: cmplwi 30, 0 ; CHECK-NEXT: addi 29, 29, 1 ; CHECK-NEXT: bc 12, 1, .LBB56_2 -; CHECK-NEXT: b .LBB56_4 -; CHECK-NEXT: .LBB56_3: -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB56_4: # %for.cond.cleanup +; CHECK-NEXT: .LBB56_3: # %for.cond.cleanup ; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: lwz 30, 24(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 20(1) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/zext-and-cmp.ll b/llvm/test/CodeGen/PowerPC/zext-and-cmp.ll index d8926e4952ffc..e90c0ff5ac494 100644 --- a/llvm/test/CodeGen/PowerPC/zext-and-cmp.ll +++ b/llvm/test/CodeGen/PowerPC/zext-and-cmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s ; Test that we recognize that an 'and' instruction that feeds a comparison @@ -8,6 +9,21 @@ ; Function Attrs: norecurse nounwind define signext i32 @cmplwi(ptr nocapture readonly %p, ptr nocapture readonly %q, i32 signext %j, i32 signext %r10) { +; CHECK-LABEL: cmplwi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: slw 4, 4, 5 +; CHECK-NEXT: and 4, 4, 6 +; CHECK-NEXT: and. 3, 4, 3 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: beqlr 0 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 3, .LC0@toc@l(3) +; CHECK-NEXT: stw 5, 0(3) +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr entry: %0 = load i32, ptr %q, align 4 %shl = shl i32 %0, %j @@ -25,9 +41,3 @@ cleanup: %retval.0 = phi i32 [ 0, %if.then ], [ 1, %entry ] ret i32 %retval.0 } - -; CHECK-LABEL: cmplwi: -; CHECK: lwz [[T1:[0-9]+]], 0(3) -; CHECK: and. {{[0-9]+}}, {{[0-9]+}}, [[T1]] -; CHECK-NOT: cmplwi -; CHECK-NEXT: beq 0, diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index b8dc7804c4908..5df217d9bf308 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -1030,6 +1030,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: blez a1, .LBB20_3 ; RV64I-NEXT: # %bb.1: # %for.body.lr.ph ; RV64I-NEXT: mv s0, a2 @@ -1045,10 +1046,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: add s2, a0, s2 ; RV64I-NEXT: bnez s1, .LBB20_2 -; RV64I-NEXT: j .LBB20_4 -; RV64I-NEXT: .LBB20_3: -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: .LBB20_4: # %for.cond.cleanup +; RV64I-NEXT: .LBB20_3: # %for.cond.cleanup ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1066,6 +1064,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-MEDIUM-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-MEDIUM-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-MEDIUM-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-MEDIUM-NEXT: li s2, 0 ; RV64I-MEDIUM-NEXT: blez a1, .LBB20_3 ; RV64I-MEDIUM-NEXT: # %bb.1: # %for.body.lr.ph ; RV64I-MEDIUM-NEXT: mv s0, a2 @@ -1081,10 +1080,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-MEDIUM-NEXT: addi s1, s1, -1 ; RV64I-MEDIUM-NEXT: add s2, a0, s2 ; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2 -; RV64I-MEDIUM-NEXT: j .LBB20_4 -; RV64I-MEDIUM-NEXT: .LBB20_3: -; RV64I-MEDIUM-NEXT: li s2, 0 -; RV64I-MEDIUM-NEXT: .LBB20_4: # %for.cond.cleanup +; RV64I-MEDIUM-NEXT: .LBB20_3: # %for.cond.cleanup ; RV64I-MEDIUM-NEXT: mv a0, s2 ; RV64I-MEDIUM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-MEDIUM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1103,6 +1099,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-LARGE-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-LARGE-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-LARGE-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-LARGE-NEXT: li s2, 0 ; RV64I-LARGE-NEXT: blez a1, .LBB20_3 ; RV64I-LARGE-NEXT: # %bb.1: # %for.body.lr.ph ; RV64I-LARGE-NEXT: mv s0, a2 @@ -1121,10 +1118,7 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-LARGE-NEXT: addi s1, s1, -1 ; RV64I-LARGE-NEXT: add s2, a0, s2 ; RV64I-LARGE-NEXT: bnez s1, .LBB20_2 -; RV64I-LARGE-NEXT: j .LBB20_4 -; RV64I-LARGE-NEXT: .LBB20_3: -; RV64I-LARGE-NEXT: li s2, 0 -; RV64I-LARGE-NEXT: .LBB20_4: # %for.cond.cleanup +; RV64I-LARGE-NEXT: .LBB20_3: # %for.cond.cleanup ; RV64I-LARGE-NEXT: mv a0, s2 ; RV64I-LARGE-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll index 43ed5eefbf4c7..266a41602aa45 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll @@ -62,7 +62,7 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 ; CHECK-NEXT: vldrht.u16 q0, [r1] ; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q0 ; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q1, q0 -; CHECK-NEXT: blo .LBB0_10 +; CHECK-NEXT: blo .LBB0_9 ; CHECK-NEXT: @ %bb.4: @ %do.body.1 ; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vctp.16 r2 @@ -71,10 +71,13 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 ; CHECK-NEXT: vldrht.u16 q1, [r1, #16] ; CHECK-NEXT: vmlsldavat.s16 r4, r7, q0, q1 ; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q0, q1 -; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: b .LBB0_9 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LBB0_5: @ %if.else -; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov r7, r4 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: cbz r2, .LBB0_9 ; CHECK-NEXT: @ %bb.6: @ %while.body14.preheader ; CHECK-NEXT: lsls r6, r2, #1 @@ -93,13 +96,7 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 ; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177 ; CHECK-NEXT: mov r8, r4 ; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: b .LBB0_10 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: .LBB0_9: -; CHECK-NEXT: mov r7, r4 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: .LBB0_10: @ %if.end +; CHECK-NEXT: .LBB0_9: @ %if.end ; CHECK-NEXT: asrl r4, r7, #6 ; CHECK-NEXT: asrl r8, r5, #6 ; CHECK-NEXT: str r4, [r3] diff --git a/llvm/test/CodeGen/WebAssembly/implicit-def.ll b/llvm/test/CodeGen/WebAssembly/implicit-def.ll index 6c5cbab889ad8..6f8f838458db6 100644 --- a/llvm/test/CodeGen/WebAssembly/implicit-def.ll +++ b/llvm/test/CodeGen/WebAssembly/implicit-def.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -o - %s -asm-verbose=false -wasm-keep-registers -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -6,10 +7,12 @@ target triple = "wasm32-unknown-unknown" ; CONST_XXX instructions to provide an explicit push. ; CHECK-LABEL: implicit_def_i32: -; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}} -; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}} -; CHECK: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} +; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}} +; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}} +; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}} +; CHECK-NOT: i32.const +; CHECK: local.get $push[[R:[0-9]+]]=, 0{{$}} +; CHECK: return $pop[[R]]{{$}} define i32 @implicit_def_i32() { br i1 undef, label %A, label %X @@ -30,8 +33,10 @@ X: ; preds = %0, C } ; CHECK-LABEL: implicit_def_i64: -; CHECK: i64.const $push[[R:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} +; CHECK: i64.const $push{{[0-9]+}}=, 0{{$}} +; CHECK-NOT: i64.const +; CHECK: local.get $push[[R:[0-9]+]]=, 0{{$}} +; CHECK: return $pop[[R]]{{$}} define i64 @implicit_def_i64() { br i1 undef, label %A, label %X @@ -52,8 +57,10 @@ X: ; preds = %0, C } ; CHECK-LABEL: implicit_def_f32: -; CHECK: f32.const $push[[R:[0-9]+]]=, 0x0p0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} +; CHECK: f32.const $push{{[0-9]+}}=, 0x0p0{{$}} +; CHECK-NOT: f32.const +; CHECK: local.get $push[[R:[0-9]+]]=, 0{{$}} +; CHECK: return $pop[[R]]{{$}} define float @implicit_def_f32() { br i1 undef, label %A, label %X @@ -74,8 +81,10 @@ X: ; preds = %0, C } ; CHECK-LABEL: implicit_def_f64: -; CHECK: f64.const $push[[R:[0-9]+]]=, 0x0p0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} +; CHECK: f64.const $push{{[0-9]+}}=, 0x0p0{{$}} +; CHECK-NOT: f64.const +; CHECK: local.get $push[[R:[0-9]+]]=, 0{{$}} +; CHECK: return $pop[[R]]{{$}} define double @implicit_def_f64() { br i1 undef, label %A, label %X @@ -96,8 +105,10 @@ X: ; preds = %0, C } ; CHECK-LABEL: implicit_def_v4i32: -; CHECK: v128.const $push[[R:[0-9]+]]=, 0, 0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} +; CHECK: v128.const $push{{[0-9]+}}=, 0, 0, 0, 0{{$}} +; CHECK-NOT: v128.const +; CHECK: local.get $push[[R:[0-9]+]]=, 0{{$}} +; CHECK: return $pop[[R]]{{$}} define <4 x i32> @implicit_def_v4i32() { br i1 undef, label %A, label %X diff --git a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll index bbce246a5d394..0397f4d53c719 100644 --- a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll @@ -7,15 +7,15 @@ define float @foo(ptr %x, ptr %y, i32 %c) nounwind { ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_1 -; CHECK-NEXT: # %bb.2: # %bb18.preheader +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb18.preheader ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_3: # %bb18 +; CHECK-NEXT: .LBB0_2: # %bb18 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ssl (%edx,%esi,4), %xmm1 @@ -23,11 +23,8 @@ define float @foo(ptr %x, ptr %y, i32 %c) nounwind { ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: incl %esi ; CHECK-NEXT: cmpl %eax, %esi -; CHECK-NEXT: jb .LBB0_3 -; CHECK-NEXT: jmp .LBB0_4 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: .LBB0_4: # %bb23 +; CHECK-NEXT: jb .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %bb23 ; CHECK-NEXT: movss %xmm0, (%esp) ; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll index c95fc00b3ee6d..a97ee48f25536 100644 --- a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll @@ -15,7 +15,7 @@ define void @t(ptr %depth, ptr %bop, i32 %mode) nounwind { ; CHECK-NEXT: je LBB0_3 ; CHECK-NEXT: ## %bb.1: ## %entry ; CHECK-NEXT: cmpl $1, %edx -; CHECK-NEXT: jne LBB0_10 +; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: LBB0_2: ## %bb2898.us ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 @@ -25,16 +25,13 @@ define void @t(ptr %depth, ptr %bop, i32 %mode) nounwind { ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: LBB0_4: ## %bb13088 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl $65535, %ecx ## imm = 0xFFFF ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: ## %bb.6: ## %bb13101 +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.5: ## %bb13101 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: jmp LBB0_7 -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: LBB0_5: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: movl $65535, %ecx ## imm = 0xFFFF -; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: LBB0_6: ## %bb13107 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: movl %ecx, %edx ; CHECK-NEXT: shll $16, %edx @@ -44,11 +41,11 @@ define void @t(ptr %depth, ptr %bop, i32 %mode) nounwind { ; CHECK-NEXT: subl %edx, %ecx ; CHECK-NEXT: testw %cx, %cx ; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.8: ## %bb13236 +; CHECK-NEXT: ## %bb.7: ## %bb13236 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_4 -; CHECK-NEXT: ## %bb.9: ## %bb13572 +; CHECK-NEXT: ## %bb.8: ## %bb13572 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: movzwl %cx, %ecx ; CHECK-NEXT: movl %ecx, %edx @@ -58,7 +55,7 @@ define void @t(ptr %depth, ptr %bop, i32 %mode) nounwind { ; CHECK-NEXT: shrl $16, %edx ; CHECK-NEXT: movw %dx, 0 ; CHECK-NEXT: jmp LBB0_4 -; CHECK-NEXT: LBB0_10: ## %return +; CHECK-NEXT: LBB0_9: ## %return ; CHECK-NEXT: retq entry: switch i32 %mode, label %return [ diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll index 3fe5b70edc718..87ae59c86838a 100644 --- a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll +++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll @@ -1103,12 +1103,11 @@ define i64 @atomic_shl1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: andl $63, %eax ; CHECK-NEXT: lock btrq %rax, (%rdi) -; CHECK-NEXT: jae .LBB40_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB40_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB40_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rax +; CHECK-NEXT: .LBB40_2: # %return ; CHECK-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -1132,26 +1131,26 @@ define i64 @atomic_shl2_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-LABEL: atomic_shl2_and_64_gpr_brnz: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movl $2, %edx -; CHECK-NEXT: shlq %cl, %rdx -; CHECK-NEXT: movq %rdx, %rsi -; CHECK-NEXT: notq %rsi +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: shlq %cl, %rsi +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: notq %rdx ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB41_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq %rax, %r8 -; CHECK-NEXT: andq %rsi, %r8 +; CHECK-NEXT: andq %rdx, %r8 ; CHECK-NEXT: lock cmpxchgq %r8, (%rdi) ; CHECK-NEXT: jne .LBB41_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: testq %rdx, %rax -; CHECK-NEXT: je .LBB41_3 -; CHECK-NEXT: # %bb.4: # %if.then -; CHECK-NEXT: movq (%rdi,%rcx,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB41_3: -; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: movl $123, %edx +; CHECK-NEXT: testq %rsi, %rax +; CHECK-NEXT: je .LBB41_4 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: movq (%rdi,%rcx,8), %rdx +; CHECK-NEXT: .LBB41_4: # %return +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq entry: %shl = shl i64 2, %c @@ -1187,14 +1186,14 @@ define i64 @atomic_shl1_neq_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-NEXT: jne .LBB42_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end ; CHECK-NEXT: leal 1(%rcx), %edx -; CHECK-NEXT: movzbl %dl, %edx -; CHECK-NEXT: btq %rdx, %rax -; CHECK-NEXT: jae .LBB42_3 -; CHECK-NEXT: # %bb.4: # %if.then -; CHECK-NEXT: movq (%rdi,%rcx,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB42_3: -; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: movzbl %dl, %esi +; CHECK-NEXT: movl $123, %edx +; CHECK-NEXT: btq %rsi, %rax +; CHECK-NEXT: jae .LBB42_4 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: movq (%rdi,%rcx,8), %rdx +; CHECK-NEXT: .LBB42_4: # %return +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -1221,12 +1220,11 @@ define i64 @atomic_shl1_small_mask_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $31, %esi ; CHECK-NEXT: lock btrq %rsi, (%rdi) -; CHECK-NEXT: jae .LBB43_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB43_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB43_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rax +; CHECK-NEXT: .LBB43_2: # %return ; CHECK-NEXT: retq entry: %rem = and i64 %c, 31 @@ -1253,12 +1251,11 @@ define i64 @atomic_shl1_mask0_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: andl $63, %eax ; CHECK-NEXT: lock btrq %rax, (%rdi) -; CHECK-NEXT: jae .LBB44_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB44_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB44_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rax +; CHECK-NEXT: .LBB44_2: # %return ; CHECK-NEXT: retq entry: %rem = and i64 %c, 63 @@ -1286,12 +1283,11 @@ define i64 @atomic_shl1_mask1_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: andl $63, %eax ; CHECK-NEXT: lock btrq %rax, (%rdi) -; CHECK-NEXT: jae .LBB45_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB45_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB45_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rax +; CHECK-NEXT: .LBB45_2: # %return ; CHECK-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -1319,12 +1315,11 @@ define i64 @atomic_shl1_mask01_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: andl $63, %eax ; CHECK-NEXT: lock btrq %rax, (%rdi) -; CHECK-NEXT: jae .LBB46_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB46_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB46_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rax +; CHECK-NEXT: .LBB46_2: # %return ; CHECK-NEXT: retq entry: %rem = and i64 %c, 63 @@ -1348,27 +1343,27 @@ return: ; preds = %entry, %if.then define i64 @atomic_blsi_and_64_gpr_brnz(ptr %v, i64 %c) nounwind { ; CHECK-LABEL: atomic_blsi_and_64_gpr_brnz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: negq %rcx -; CHECK-NEXT: andq %rsi, %rcx -; CHECK-NEXT: movq %rcx, %rdx -; CHECK-NEXT: notq %rdx +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: negq %rdx +; CHECK-NEXT: andq %rsi, %rdx +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: notq %rcx ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB47_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq %rax, %r8 -; CHECK-NEXT: andq %rdx, %r8 +; CHECK-NEXT: andq %rcx, %r8 ; CHECK-NEXT: lock cmpxchgq %r8, (%rdi) ; CHECK-NEXT: jne .LBB47_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: testq %rcx, %rax -; CHECK-NEXT: je .LBB47_3 -; CHECK-NEXT: # %bb.4: # %if.then -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB47_3: -; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: movl $123, %ecx +; CHECK-NEXT: testq %rdx, %rax +; CHECK-NEXT: je .LBB47_4 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: movq (%rdi,%rsi,8), %rcx +; CHECK-NEXT: .LBB47_4: # %return +; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: retq entry: %sub = sub i64 0, %c @@ -1393,12 +1388,11 @@ define i64 @atomic_shl1_xor_64_const_br(ptr %v) nounwind { ; CHECK-LABEL: atomic_shl1_xor_64_const_br: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lock btcq $4, (%rdi) -; CHECK-NEXT: jae .LBB48_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq 32(%rdi), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB48_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB48_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq 32(%rdi), %rax +; CHECK-NEXT: .LBB48_2: # %return ; CHECK-NEXT: retq entry: %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 @@ -1519,12 +1513,11 @@ define i64 @atomic_shl1_xor_64_const_brnz(ptr %v) nounwind { ; CHECK-LABEL: atomic_shl1_xor_64_const_brnz: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lock btcq $4, (%rdi) -; CHECK-NEXT: jae .LBB52_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: movq 32(%rdi), %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB52_1: ; CHECK-NEXT: movl $123, %eax +; CHECK-NEXT: jae .LBB52_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq 32(%rdi), %rax +; CHECK-NEXT: .LBB52_2: # %return ; CHECK-NEXT: retq entry: %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll index b4d40fee01e41..b12fbe4a8a948 100644 --- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll +++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll @@ -334,6 +334,7 @@ define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind { ; X86-LABEL: atomic_shl1_and_8_gpr_brnz: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl $1, %ebx @@ -349,16 +350,15 @@ define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind { ; X86-NEXT: lock cmpxchgb %ch, (%edx) ; X86-NEXT: jne .LBB6_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: testl %eax, %ebx -; X86-NEXT: je .LBB6_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: movb $123, %al +; X86-NEXT: testl %esi, %ebx +; X86-NEXT: je .LBB6_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: movzbl (%edx,%eax), %eax -; X86-NEXT: popl %ebx -; X86-NEXT: retl -; X86-NEXT: .LBB6_3: -; X86-NEXT: movb $123, %al +; X86-NEXT: .LBB6_4: # %return +; X86-NEXT: popl %esi ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; @@ -378,15 +378,14 @@ define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind { ; X64-NEXT: lock cmpxchgb %r8b, (%rdi) ; X64-NEXT: jne .LBB6_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: testl %eax, %edx -; X64-NEXT: je .LBB6_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzbl %al, %esi +; X64-NEXT: movb $123, %al +; X64-NEXT: testl %esi, %edx +; X64-NEXT: je .LBB6_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: movzbl (%rdi,%rax), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB6_3: -; X64-NEXT: movb $123, %al +; X64-NEXT: .LBB6_4: # %return ; X64-NEXT: retq entry: %conv = zext i8 %c to i32 @@ -428,15 +427,13 @@ define zeroext i8 @atomic_shl1_mask0_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounw ; X86-NEXT: jne .LBB7_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end ; X86-NEXT: movzbl %al, %esi -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: btl %eax, %esi -; X86-NEXT: jae .LBB7_3 -; X86-NEXT: # %bb.4: # %if.then -; X86-NEXT: movzbl (%edx,%eax), %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB7_3: +; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: movb $123, %al +; X86-NEXT: btl %ecx, %esi +; X86-NEXT: jae .LBB7_4 +; X86-NEXT: # %bb.3: # %if.then +; X86-NEXT: movzbl (%edx,%ecx), %eax +; X86-NEXT: .LBB7_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -455,14 +452,13 @@ define zeroext i8 @atomic_shl1_mask0_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounw ; X64-NEXT: jne .LBB7_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end ; X64-NEXT: movzbl %al, %edx -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: btl %eax, %edx -; X64-NEXT: jae .LBB7_3 -; X64-NEXT: # %bb.4: # %if.then -; X64-NEXT: movzbl (%rdi,%rax), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB7_3: +; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: movb $123, %al +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: jae .LBB7_4 +; X64-NEXT: # %bb.3: # %if.then +; X64-NEXT: movzbl (%rdi,%rcx), %eax +; X64-NEXT: .LBB7_4: # %return ; X64-NEXT: retq entry: %0 = and i8 %c, 7 @@ -509,15 +505,13 @@ define zeroext i8 @atomic_shl1_mask01_and_8_gpr_brnz(ptr %v, i8 zeroext %c) noun ; X86-NEXT: jne .LBB8_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end ; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: movb $123, %al ; X86-NEXT: testl %ecx, %ebx -; X86-NEXT: je .LBB8_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: je .LBB8_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzbl %ah, %eax ; X86-NEXT: movzbl (%edx,%eax), %eax -; X86-NEXT: popl %ebx -; X86-NEXT: retl -; X86-NEXT: .LBB8_3: -; X86-NEXT: movb $123, %al +; X86-NEXT: .LBB8_4: # %return ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; @@ -538,15 +532,14 @@ define zeroext i8 @atomic_shl1_mask01_and_8_gpr_brnz(ptr %v, i8 zeroext %c) noun ; X64-NEXT: lock cmpxchgb %r8b, (%rdi) ; X64-NEXT: jne .LBB8_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: testl %eax, %edx -; X64-NEXT: je .LBB8_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzbl %al, %ecx +; X64-NEXT: movb $123, %al +; X64-NEXT: testl %ecx, %edx +; X64-NEXT: je .LBB8_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movzbl (%rdi,%rax), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB8_3: -; X64-NEXT: movb $123, %al +; X64-NEXT: .LBB8_4: # %return ; X64-NEXT: retq entry: %0 = and i8 %c, 7 @@ -3033,16 +3026,14 @@ define zeroext i16 @atomic_shl1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB48_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: testl %eax, %esi -; X86-NEXT: je .LBB48_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %edi +; X86-NEXT: movw $123, %ax +; X86-NEXT: testl %edi, %esi +; X86-NEXT: je .LBB48_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB48_5 -; X86-NEXT: .LBB48_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB48_5: # %return +; X86-NEXT: .LBB48_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -3066,15 +3057,14 @@ define zeroext i16 @atomic_shl1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB48_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: testl %eax, %edx -; X64-NEXT: je .LBB48_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %esi +; X64-NEXT: movw $123, %ax +; X64-NEXT: testl %esi, %edx +; X64-NEXT: je .LBB48_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %cx, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB48_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB48_4: # %return ; X64-NEXT: retq entry: %conv = zext i16 %c to i32 @@ -3119,16 +3109,14 @@ define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_brnz(ptr %v, i16 zeroext % ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB49_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB49_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %esi +; X86-NEXT: movw $123, %ax +; X86-NEXT: btl %ecx, %esi +; X86-NEXT: jae .LBB49_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB49_5 -; X86-NEXT: .LBB49_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB49_5: # %return +; X86-NEXT: .LBB49_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -3150,15 +3138,14 @@ define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_brnz(ptr %v, i16 zeroext % ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB49_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: btl %ecx, %eax -; X64-NEXT: jae .LBB49_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %edx +; X64-NEXT: movw $123, %ax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: jae .LBB49_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %cx, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB49_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB49_4: # %return ; X64-NEXT: retq entry: %0 = and i16 %c, 7 @@ -3203,16 +3190,14 @@ define zeroext i16 @atomic_shl1_mask0_and_16_gpr_brnz(ptr %v, i16 zeroext %c) no ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB50_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB50_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %esi +; X86-NEXT: movw $123, %ax +; X86-NEXT: btl %ecx, %esi +; X86-NEXT: jae .LBB50_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB50_5 -; X86-NEXT: .LBB50_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB50_5: # %return +; X86-NEXT: .LBB50_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -3233,15 +3218,14 @@ define zeroext i16 @atomic_shl1_mask0_and_16_gpr_brnz(ptr %v, i16 zeroext %c) no ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB50_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: btl %ecx, %eax -; X64-NEXT: jae .LBB50_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %edx +; X64-NEXT: movw $123, %ax +; X64-NEXT: btl %ecx, %edx +; X64-NEXT: jae .LBB50_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %cx, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB50_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB50_4: # %return ; X64-NEXT: retq entry: %0 = and i16 %c, 15 @@ -3286,18 +3270,16 @@ define zeroext i16 @atomic_shl1_mask1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) no ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB51_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: andl $15, %esi -; X86-NEXT: btl %esi, %eax -; X86-NEXT: jae .LBB51_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %esi +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: andl $15, %edi +; X86-NEXT: movw $123, %ax +; X86-NEXT: btl %edi, %esi +; X86-NEXT: jae .LBB51_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB51_5 -; X86-NEXT: .LBB51_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB51_5: # %return +; X86-NEXT: .LBB51_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -3318,17 +3300,16 @@ define zeroext i16 @atomic_shl1_mask1_and_16_gpr_brnz(ptr %v, i16 zeroext %c) no ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB51_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: andl $15, %edx -; X64-NEXT: btl %edx, %eax -; X64-NEXT: jae .LBB51_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %edx +; X64-NEXT: movl %ecx, %esi +; X64-NEXT: andl $15, %esi +; X64-NEXT: movw $123, %ax +; X64-NEXT: btl %esi, %edx +; X64-NEXT: jae .LBB51_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %cx, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB51_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB51_4: # %return ; X64-NEXT: retq entry: %conv = zext i16 %c to i32 @@ -3380,16 +3361,14 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 zeroext %c) n ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB52_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: testl %eax, %esi -; X86-NEXT: je .LBB52_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %ecx +; X86-NEXT: movw $123, %ax +; X86-NEXT: testl %ecx, %esi +; X86-NEXT: je .LBB52_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %bx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB52_5 -; X86-NEXT: .LBB52_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB52_5: # %return +; X86-NEXT: .LBB52_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -3414,15 +3393,14 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 zeroext %c) n ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB52_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: testl %eax, %edx -; X64-NEXT: je .LBB52_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %ecx +; X64-NEXT: movw $123, %ax +; X64-NEXT: testl %ecx, %edx +; X64-NEXT: je .LBB52_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %si, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB52_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB52_4: # %return ; X64-NEXT: retq entry: %0 = and i16 %c, 15 @@ -3471,16 +3449,14 @@ define zeroext i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: jne .LBB53_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: testl %eax, %esi -; X86-NEXT: je .LBB53_3 -; X86-NEXT: # %bb.4: # %if.then +; X86-NEXT: movzwl %ax, %edi +; X86-NEXT: movw $123, %ax +; X86-NEXT: testl %edi, %esi +; X86-NEXT: je .LBB53_4 +; X86-NEXT: # %bb.3: # %if.then ; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: movzwl (%edx,%eax,2), %eax -; X86-NEXT: jmp .LBB53_5 -; X86-NEXT: .LBB53_3: -; X86-NEXT: movw $123, %ax -; X86-NEXT: .LBB53_5: # %return +; X86-NEXT: .LBB53_4: # %return ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -3504,15 +3480,14 @@ define zeroext i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: jne .LBB53_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: testl %eax, %ecx -; X64-NEXT: je .LBB53_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movzwl %ax, %edx +; X64-NEXT: movw $123, %ax +; X64-NEXT: testl %edx, %ecx +; X64-NEXT: je .LBB53_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movzwl %si, %eax ; X64-NEXT: movzwl (%rdi,%rax,2), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB53_3: -; X64-NEXT: movw $123, %ax +; X64-NEXT: .LBB53_4: # %return ; X64-NEXT: retq entry: %conv = zext i16 %c to i32 @@ -3609,25 +3584,23 @@ entry: define zeroext i16 @atomic_shl1_or_16_const_brnz(ptr %v) nounwind { ; X86-LABEL: atomic_shl1_or_16_const_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: lock btsw $4, (%eax) -; X86-NEXT: jae .LBB56_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movzwl 8(%eax), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB56_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: lock btsw $4, (%ecx) ; X86-NEXT: movw $123, %ax +; X86-NEXT: jae .LBB56_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movzwl 8(%ecx), %eax +; X86-NEXT: .LBB56_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_or_16_const_brnz: ; X64: # %bb.0: # %entry ; X64-NEXT: lock btsw $4, (%rdi) -; X64-NEXT: jae .LBB56_1 -; X64-NEXT: # %bb.2: # %if.then -; X64-NEXT: movzwl 8(%rdi), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB56_1: ; X64-NEXT: movw $123, %ax +; X64-NEXT: jae .LBB56_2 +; X64-NEXT: # %bb.1: # %if.then +; X64-NEXT: movzwl 8(%rdi), %eax +; X64-NEXT: .LBB56_2: # %return ; X64-NEXT: retq entry: %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 @@ -4620,17 +4593,16 @@ entry: define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_or_32_gpr_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB78_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB78_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB78_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB78_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_or_32_gpr_br: @@ -4638,13 +4610,12 @@ define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB78_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB78_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB78_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB78_2: # %return ; X64-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -4667,29 +4638,27 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_small_mask_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andl $15, %ecx -; X86-NEXT: lock btsl %ecx, (%eax) -; X86-NEXT: jae .LBB79_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%eax,%ecx,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB79_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $15, %edx +; X86-NEXT: lock btsl %edx, (%ecx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB79_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%ecx,%edx,4), %eax +; X86-NEXT: .LBB79_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X64: # %bb.0: # %entry ; X64-NEXT: andl $15, %esi ; X64-NEXT: lock btsl %esi, (%rdi) -; X64-NEXT: jae .LBB79_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB79_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB79_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB79_2: # %return ; X64-NEXT: retq entry: %0 = and i32 %c, 15 @@ -4713,17 +4682,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask0_or_32_gpr_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB80_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB80_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB80_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB80_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask0_or_32_gpr_br: @@ -4731,13 +4699,12 @@ define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB80_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB80_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB80_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB80_2: # %return ; X64-NEXT: retq entry: %rem = and i32 %c, 31 @@ -4762,17 +4729,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask1_or_32_gpr_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB81_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB81_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB81_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB81_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask1_or_32_gpr_br: @@ -4780,13 +4746,12 @@ define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB81_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB81_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB81_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB81_2: # %return ; X64-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -4811,17 +4776,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask01_or_32_gpr_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB82_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB82_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB82_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB82_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask01_or_32_gpr_br: @@ -4829,13 +4793,12 @@ define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB82_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB82_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB82_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB82_2: # %return ; X64-NEXT: retq entry: %rem = and i32 %c, 31 @@ -4861,54 +4824,53 @@ define i32 @atomic_blsi_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: negl %esi -; X86-NEXT: andl %ecx, %esi -; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: negl %edi +; X86-NEXT: andl %edx, %edi +; X86-NEXT: movl (%esi), %eax ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB83_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: lock cmpxchgl %ecx, (%esi) ; X86-NEXT: jne .LBB83_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: testl %esi, %eax -; X86-NEXT: je .LBB83_3 -; X86-NEXT: # %bb.4: # %if.then -; X86-NEXT: movl (%edx,%ecx,4), %eax -; X86-NEXT: jmp .LBB83_5 -; X86-NEXT: .LBB83_3: -; X86-NEXT: movl $123, %eax -; X86-NEXT: .LBB83_5: # %return +; X86-NEXT: movl $123, %ecx +; X86-NEXT: testl %edi, %eax +; X86-NEXT: je .LBB83_4 +; X86-NEXT: # %bb.3: # %if.then +; X86-NEXT: movl (%esi,%edx,4), %ecx +; X86-NEXT: .LBB83_4: # %return +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: atomic_blsi_or_32_gpr_br: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: andl %esi, %ecx +; X64-NEXT: movl %esi, %edx +; X64-NEXT: negl %edx +; X64-NEXT: andl %esi, %edx ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: .p2align 4 ; X64-NEXT: .LBB83_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %edx -; X64-NEXT: orl %ecx, %edx -; X64-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: orl %edx, %ecx +; X64-NEXT: lock cmpxchgl %ecx, (%rdi) ; X64-NEXT: jne .LBB83_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: testl %ecx, %eax -; X64-NEXT: je .LBB83_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movl $123, %ecx +; X64-NEXT: testl %edx, %eax +; X64-NEXT: je .LBB83_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movl %esi, %eax -; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB83_3: -; X64-NEXT: movl $123, %eax +; X64-NEXT: movl (%rdi,%rax,4), %ecx +; X64-NEXT: .LBB83_4: # %return +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq entry: %sub = sub i32 0, %c @@ -5245,17 +5207,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_or_32_gpr_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB90_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB90_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB90_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB90_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_or_32_gpr_brnz: @@ -5263,13 +5224,12 @@ define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB90_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB90_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB90_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB90_2: # %return ; X64-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -5292,29 +5252,27 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_small_mask_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andl $15, %ecx -; X86-NEXT: lock btsl %ecx, (%eax) -; X86-NEXT: jae .LBB91_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%eax,%ecx,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB91_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $15, %edx +; X86-NEXT: lock btsl %edx, (%ecx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB91_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%ecx,%edx,4), %eax +; X86-NEXT: .LBB91_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X64: # %bb.0: # %entry ; X64-NEXT: andl $15, %esi ; X64-NEXT: lock btsl %esi, (%rdi) -; X64-NEXT: jae .LBB91_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB91_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB91_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB91_2: # %return ; X64-NEXT: retq entry: %0 = and i32 %c, 15 @@ -5338,17 +5296,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB92_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB92_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB92_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB92_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: @@ -5356,13 +5313,12 @@ define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB92_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB92_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB92_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB92_2: # %return ; X64-NEXT: retq entry: %rem = and i32 %c, 31 @@ -5387,17 +5343,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB93_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB93_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB93_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB93_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: @@ -5405,13 +5360,12 @@ define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB93_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB93_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB93_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB93_2: # %return ; X64-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -5436,17 +5390,16 @@ return: ; preds = %entry, %if.then define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: andl $31, %edx -; X86-NEXT: lock btsl %edx, (%ecx) -; X86-NEXT: jae .LBB94_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl (%ecx,%eax,4), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB94_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: lock btsl %eax, (%edx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB94_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl (%edx,%ecx,4), %eax +; X86-NEXT: .LBB94_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: @@ -5454,13 +5407,12 @@ define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andl $31, %eax ; X64-NEXT: lock btsl %eax, (%rdi) -; X64-NEXT: jae .LBB94_1 -; X64-NEXT: # %bb.2: # %if.then +; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB94_2 +; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl %esi, %eax ; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB94_1: -; X64-NEXT: movl $123, %eax +; X64-NEXT: .LBB94_2: # %return ; X64-NEXT: retq entry: %rem = and i32 %c, 31 @@ -5486,54 +5438,53 @@ define i32 @atomic_blsi_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: negl %esi -; X86-NEXT: andl %ecx, %esi -; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: negl %edi +; X86-NEXT: andl %edx, %edi +; X86-NEXT: movl (%esi), %eax ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB95_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: lock cmpxchgl %ecx, (%esi) ; X86-NEXT: jne .LBB95_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: testl %esi, %eax -; X86-NEXT: je .LBB95_3 -; X86-NEXT: # %bb.4: # %if.then -; X86-NEXT: movl (%edx,%ecx,4), %eax -; X86-NEXT: jmp .LBB95_5 -; X86-NEXT: .LBB95_3: -; X86-NEXT: movl $123, %eax -; X86-NEXT: .LBB95_5: # %return +; X86-NEXT: movl $123, %ecx +; X86-NEXT: testl %edi, %eax +; X86-NEXT: je .LBB95_4 +; X86-NEXT: # %bb.3: # %if.then +; X86-NEXT: movl (%esi,%edx,4), %ecx +; X86-NEXT: .LBB95_4: # %return +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: atomic_blsi_or_32_gpr_brnz: ; X64: # %bb.0: # %entry -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: negl %ecx -; X64-NEXT: andl %esi, %ecx +; X64-NEXT: movl %esi, %edx +; X64-NEXT: negl %edx +; X64-NEXT: andl %esi, %edx ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: .p2align 4 ; X64-NEXT: .LBB95_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %edx -; X64-NEXT: orl %ecx, %edx -; X64-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: orl %edx, %ecx +; X64-NEXT: lock cmpxchgl %ecx, (%rdi) ; X64-NEXT: jne .LBB95_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: testl %ecx, %eax -; X64-NEXT: je .LBB95_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: movl $123, %ecx +; X64-NEXT: testl %edx, %eax +; X64-NEXT: je .LBB95_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movl %esi, %eax -; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB95_3: -; X64-NEXT: movl $123, %eax +; X64-NEXT: movl (%rdi,%rax,4), %ecx +; X64-NEXT: .LBB95_4: # %return +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq entry: %sub = sub i32 0, %c @@ -5965,26 +5916,25 @@ define i32 @atomic_shl1_mask0_and_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl $1, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl $1, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl (%esi), %eax ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB104_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl %eax, %edi -; X86-NEXT: andl %esi, %edi -; X86-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NEXT: andl %edx, %edi +; X86-NEXT: lock cmpxchgl %edi, (%esi) ; X86-NEXT: jne .LBB104_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movl $123, %edx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB104_3 -; X86-NEXT: # %bb.4: # %if.then -; X86-NEXT: movl (%edx,%ecx,4), %eax -; X86-NEXT: jmp .LBB104_5 -; X86-NEXT: .LBB104_3: -; X86-NEXT: movl $123, %eax -; X86-NEXT: .LBB104_5: # %return +; X86-NEXT: jae .LBB104_4 +; X86-NEXT: # %bb.3: # %if.then +; X86-NEXT: movl (%esi,%ecx,4), %edx +; X86-NEXT: .LBB104_4: # %return +; X86-NEXT: movl %edx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -6003,14 +5953,14 @@ define i32 @atomic_shl1_mask0_and_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: lock cmpxchgl %esi, (%rdi) ; X64-NEXT: jne .LBB104_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: movl $123, %edx ; X64-NEXT: btl %ecx, %eax -; X64-NEXT: jae .LBB104_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: jae .LBB104_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB104_3: -; X64-NEXT: movl $123, %eax +; X64-NEXT: movl (%rdi,%rax,4), %edx +; X64-NEXT: .LBB104_4: # %return +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %rem = and i32 %c, 31 @@ -6038,26 +5988,25 @@ define i32 @atomic_shl1_mask1_and_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl $1, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl (%edx), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl $1, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl (%esi), %eax ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB105_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl %eax, %edi -; X86-NEXT: andl %esi, %edi -; X86-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NEXT: andl %edx, %edi +; X86-NEXT: lock cmpxchgl %edi, (%esi) ; X86-NEXT: jne .LBB105_1 ; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movl $123, %edx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB105_3 -; X86-NEXT: # %bb.4: # %if.then -; X86-NEXT: movl (%edx,%ecx,4), %eax -; X86-NEXT: jmp .LBB105_5 -; X86-NEXT: .LBB105_3: -; X86-NEXT: movl $123, %eax -; X86-NEXT: .LBB105_5: # %return +; X86-NEXT: jae .LBB105_4 +; X86-NEXT: # %bb.3: # %if.then +; X86-NEXT: movl (%esi,%ecx,4), %edx +; X86-NEXT: .LBB105_4: # %return +; X86-NEXT: movl %edx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -6076,14 +6025,14 @@ define i32 @atomic_shl1_mask1_and_32_gpr_br(ptr %v, i32 %c) nounwind { ; X64-NEXT: lock cmpxchgl %esi, (%rdi) ; X64-NEXT: jne .LBB105_1 ; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: movl $123, %edx ; X64-NEXT: btl %ecx, %eax -; X64-NEXT: jae .LBB105_3 -; X64-NEXT: # %bb.4: # %if.then +; X64-NEXT: jae .LBB105_4 +; X64-NEXT: # %bb.3: # %if.then ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: movl (%rdi,%rax,4), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB105_3: -; X64-NEXT: movl $123, %eax +; X64-NEXT: movl (%rdi,%rax,4), %edx +; X64-NEXT: .LBB105_4: # %return +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -6749,25 +6698,23 @@ entry: define i32 @atomic_shl1_and_32_const_br(ptr %v) nounwind { ; X86-LABEL: atomic_shl1_and_32_const_br: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: lock btrl $4, (%eax) -; X86-NEXT: jae .LBB120_1 -; X86-NEXT: # %bb.2: # %if.then -; X86-NEXT: movl 16(%eax), %eax -; X86-NEXT: retl -; X86-NEXT: .LBB120_1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: lock btrl $4, (%ecx) ; X86-NEXT: movl $123, %eax +; X86-NEXT: jae .LBB120_2 +; X86-NEXT: # %bb.1: # %if.then +; X86-NEXT: movl 16(%ecx), %eax +; X86-NEXT: .LBB120_2: # %return ; X86-NEXT: retl ; ; X64-LABEL: atomic_shl1_and_32_const_br: ; X64: # %bb.0: # %entry ; X64-NEXT: lock btrl $4, (%rdi) -; X64-NEXT: jae .LBB120_1 -; X64-NEXT: # %bb.2: # %if.then -; X64-NEXT: movl 16(%rdi), %eax -; X64-NEXT: retq -; X64-NEXT: .LBB120_1: ; X64-NEXT: movl $123, %eax +; X64-NEXT: jae .LBB120_2 +; X64-NEXT: # %bb.1: # %if.then +; X64-NEXT: movl 16(%rdi), %eax +; X64-NEXT: .LBB120_2: # %return ; X64-NEXT: retq entry: %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 diff --git a/llvm/test/CodeGen/X86/branchfolding-debugloc.ll b/llvm/test/CodeGen/X86/branchfolding-debugloc.ll index cfcc2af16e3fc..66880555aa3f9 100644 --- a/llvm/test/CodeGen/X86/branchfolding-debugloc.ll +++ b/llvm/test/CodeGen/X86/branchfolding-debugloc.ll @@ -1,7 +1,8 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s | FileCheck %s ; ; The test code is generated from the following source code: -; +; ; 1 extern int bar(int x); ; 2 ; 3 int foo(int *begin, int *end) { @@ -16,14 +17,12 @@ ; 12 } ; 13 return ret; ; 14 } -; +; ; CHECK: # %entry ; CHECK-NOT: # %for.body ; CHECK: .loc 1 6 3 ; CHECK-NEXT: je [[BB:.LBB[^ ]+]] -; CHECK: [[BB]]: -; CHECK: xorl %ebp, %ebp -; CHECK-NEXT: .LBB{{.*}} # %for.end +; CHECK: [[BB]]: # %for.end target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll index 5acbccf41c5d3..82023f383c9d0 100644 --- a/llvm/test/CodeGen/X86/break-false-dep.ll +++ b/llvm/test/CodeGen/X86/break-false-dep.ll @@ -132,14 +132,14 @@ declare double @llvm.sqrt.f64(double) define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp { ; SSE-LINUX-LABEL: loopdep1: ; SSE-LINUX: # %bb.0: # %entry -; SSE-LINUX-NEXT: testl %edi, %edi -; SSE-LINUX-NEXT: je .LBB6_1 -; SSE-LINUX-NEXT: # %bb.2: # %for.body.preheader -; SSE-LINUX-NEXT: movl $1, %eax ; SSE-LINUX-NEXT: xorps %xmm0, %xmm0 ; SSE-LINUX-NEXT: xorps %xmm1, %xmm1 +; SSE-LINUX-NEXT: testl %edi, %edi +; SSE-LINUX-NEXT: je .LBB6_3 +; SSE-LINUX-NEXT: # %bb.1: # %for.body.preheader +; SSE-LINUX-NEXT: movl $1, %eax ; SSE-LINUX-NEXT: .p2align 4 -; SSE-LINUX-NEXT: .LBB6_3: # %for.body +; SSE-LINUX-NEXT: .LBB6_2: # %for.body ; SSE-LINUX-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE-LINUX-NEXT: xorps %xmm2, %xmm2 ; SSE-LINUX-NEXT: cvtsi2ss %eax, %xmm2 @@ -149,26 +149,21 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp { ; SSE-LINUX-NEXT: addss %xmm3, %xmm1 ; SSE-LINUX-NEXT: incl %eax ; SSE-LINUX-NEXT: decl %edi -; SSE-LINUX-NEXT: jne .LBB6_3 -; SSE-LINUX-NEXT: # %bb.4: # %for.end -; SSE-LINUX-NEXT: subss %xmm1, %xmm0 -; SSE-LINUX-NEXT: retq -; SSE-LINUX-NEXT: .LBB6_1: -; SSE-LINUX-NEXT: xorps %xmm0, %xmm0 -; SSE-LINUX-NEXT: xorps %xmm1, %xmm1 +; SSE-LINUX-NEXT: jne .LBB6_2 +; SSE-LINUX-NEXT: .LBB6_3: # %for.end ; SSE-LINUX-NEXT: subss %xmm1, %xmm0 ; SSE-LINUX-NEXT: retq ; ; SSE-WIN-LABEL: loopdep1: ; SSE-WIN: # %bb.0: # %entry -; SSE-WIN-NEXT: testl %ecx, %ecx -; SSE-WIN-NEXT: je .LBB6_1 -; SSE-WIN-NEXT: # %bb.2: # %for.body.preheader -; SSE-WIN-NEXT: movl $1, %eax ; SSE-WIN-NEXT: xorps %xmm0, %xmm0 ; SSE-WIN-NEXT: xorps %xmm1, %xmm1 +; SSE-WIN-NEXT: testl %ecx, %ecx +; SSE-WIN-NEXT: je .LBB6_3 +; SSE-WIN-NEXT: # %bb.1: # %for.body.preheader +; SSE-WIN-NEXT: movl $1, %eax ; SSE-WIN-NEXT: .p2align 4 -; SSE-WIN-NEXT: .LBB6_3: # %for.body +; SSE-WIN-NEXT: .LBB6_2: # %for.body ; SSE-WIN-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE-WIN-NEXT: xorps %xmm2, %xmm2 ; SSE-WIN-NEXT: cvtsi2ss %eax, %xmm2 @@ -178,26 +173,21 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp { ; SSE-WIN-NEXT: addss %xmm3, %xmm1 ; SSE-WIN-NEXT: incl %eax ; SSE-WIN-NEXT: decl %ecx -; SSE-WIN-NEXT: jne .LBB6_3 -; SSE-WIN-NEXT: # %bb.4: # %for.end -; SSE-WIN-NEXT: subss %xmm1, %xmm0 -; SSE-WIN-NEXT: retq -; SSE-WIN-NEXT: .LBB6_1: -; SSE-WIN-NEXT: xorps %xmm0, %xmm0 -; SSE-WIN-NEXT: xorps %xmm1, %xmm1 +; SSE-WIN-NEXT: jne .LBB6_2 +; SSE-WIN-NEXT: .LBB6_3: # %for.end ; SSE-WIN-NEXT: subss %xmm1, %xmm0 ; SSE-WIN-NEXT: retq ; ; AVX1-LABEL: loopdep1: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: testl %ecx, %ecx -; AVX1-NEXT: je .LBB6_1 -; AVX1-NEXT: # %bb.2: # %for.body.preheader -; AVX1-NEXT: movl $1, %eax ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: testl %ecx, %ecx +; AVX1-NEXT: je .LBB6_3 +; AVX1-NEXT: # %bb.1: # %for.body.preheader +; AVX1-NEXT: movl $1, %eax ; AVX1-NEXT: .p2align 4 -; AVX1-NEXT: .LBB6_3: # %for.body +; AVX1-NEXT: .LBB6_2: # %for.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: vcvtsi2ss %eax, %xmm4, %xmm2 ; AVX1-NEXT: vcvtsi2ss %ecx, %xmm4, %xmm3 @@ -205,26 +195,21 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp { ; AVX1-NEXT: vaddss %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: incl %eax ; AVX1-NEXT: decl %ecx -; AVX1-NEXT: jne .LBB6_3 -; AVX1-NEXT: # %bb.4: # %for.end -; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB6_1: -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: jne .LBB6_2 +; AVX1-NEXT: .LBB6_3: # %for.end ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512VL-LABEL: loopdep1: ; AVX512VL: # %bb.0: # %entry -; AVX512VL-NEXT: testl %ecx, %ecx -; AVX512VL-NEXT: je .LBB6_1 -; AVX512VL-NEXT: # %bb.2: # %for.body.preheader -; AVX512VL-NEXT: movl $1, %eax ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: testl %ecx, %ecx +; AVX512VL-NEXT: je .LBB6_3 +; AVX512VL-NEXT: # %bb.1: # %for.body.preheader +; AVX512VL-NEXT: movl $1, %eax ; AVX512VL-NEXT: .p2align 4 -; AVX512VL-NEXT: .LBB6_3: # %for.body +; AVX512VL-NEXT: .LBB6_2: # %for.body ; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512VL-NEXT: vcvtsi2ss %eax, %xmm3, %xmm2 ; AVX512VL-NEXT: vaddss %xmm2, %xmm0, %xmm0 @@ -232,13 +217,8 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp { ; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512VL-NEXT: incl %eax ; AVX512VL-NEXT: decl %ecx -; AVX512VL-NEXT: jne .LBB6_3 -; AVX512VL-NEXT: # %bb.4: # %for.end -; AVX512VL-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; AVX512VL-NEXT: .LBB6_1: -; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: jne .LBB6_2 +; AVX512VL-NEXT: .LBB6_3: # %for.end ; AVX512VL-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/coalescer-commute4.ll b/llvm/test/CodeGen/X86/coalescer-commute4.ll index 0ec99dcba88ae..b7fadaf6cbfc0 100644 --- a/llvm/test/CodeGen/X86/coalescer-commute4.ll +++ b/llvm/test/CodeGen/X86/coalescer-commute4.ll @@ -8,15 +8,15 @@ define float @foo(ptr %x, ptr %y, i32 %c) nounwind { ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_1 -; CHECK-NEXT: ## %bb.2: ## %bb.preheader +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %bb.preheader ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: LBB0_3: ## %bb +; CHECK-NEXT: LBB0_2: ## %bb ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2ssl (%edx,%esi,4), %xmm1 @@ -24,11 +24,8 @@ define float @foo(ptr %x, ptr %y, i32 %c) nounwind { ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: incl %esi ; CHECK-NEXT: cmpl %eax, %esi -; CHECK-NEXT: jb LBB0_3 -; CHECK-NEXT: jmp LBB0_4 -; CHECK-NEXT: LBB0_1: -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: LBB0_4: ## %bb23 +; CHECK-NEXT: jb LBB0_2 +; CHECK-NEXT: LBB0_3: ## %bb23 ; CHECK-NEXT: movss %xmm0, (%esp) ; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll index fecb62fbc5aea..cd26f2fa60be3 100644 --- a/llvm/test/CodeGen/X86/ctlo.ll +++ b/llvm/test/CodeGen/X86/ctlo.ll @@ -15,17 +15,15 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i8 @ctlo_i8(i8 %x) { ; X86-NOCMOV-LABEL: ctlo_i8: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: xorb $-1, %al -; X86-NOCMOV-NEXT: je .LBB0_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: movzbl %al, %eax +; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOCMOV-NEXT: xorb $-1, %cl +; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: je .LBB0_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: movzbl %cl, %eax ; X86-NOCMOV-NEXT: bsrl %eax, %eax ; X86-NOCMOV-NEXT: xorl $7, %eax -; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB0_1: -; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: .LBB0_2: # %cond.end ; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; X86-NOCMOV-NEXT: retl ; @@ -118,16 +116,14 @@ define i8 @ctlo_i8_undef(i8 %x) { define i16 @ctlo_i16(i16 %x) { ; X86-NOCMOV-LABEL: ctlo_i16: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: xorw $-1, %ax -; X86-NOCMOV-NEXT: je .LBB2_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrw %ax, %ax -; X86-NOCMOV-NEXT: xorl $15, %eax -; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB2_1: +; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOCMOV-NEXT: xorw $-1, %cx ; X86-NOCMOV-NEXT: movw $16, %ax +; X86-NOCMOV-NEXT: je .LBB2_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrw %cx, %ax +; X86-NOCMOV-NEXT: xorl $15, %eax +; X86-NOCMOV-NEXT: .LBB2_2: # %cond.end ; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NOCMOV-NEXT: retl ; @@ -206,15 +202,14 @@ define i16 @ctlo_i16_undef(i16 %x) { define i32 @ctlo_i32(i32 %x) { ; X86-NOCMOV-LABEL: ctlo_i32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: xorl $-1, %eax -; X86-NOCMOV-NEXT: je .LBB4_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB4_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOCMOV-NEXT: xorl $-1, %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: je .LBB4_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB4_2: # %cond.end ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: ctlo_i32: diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll index 0eabfeae853f7..a1e9c0c95418f 100644 --- a/llvm/test/CodeGen/X86/ctlz.ll +++ b/llvm/test/CodeGen/X86/ctlz.ll @@ -220,17 +220,15 @@ define i64 @ctlz_i64(i64 %x) { define i8 @ctlz_i8_zero_test(i8 %n) { ; X86-NOCMOV-LABEL: ctlz_i8_zero_test: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testb %al, %al -; X86-NOCMOV-NEXT: je .LBB4_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: movzbl %al, %eax +; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: testb %cl, %cl +; X86-NOCMOV-NEXT: je .LBB4_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: movzbl %cl, %eax ; X86-NOCMOV-NEXT: bsrl %eax, %eax ; X86-NOCMOV-NEXT: xorl $7, %eax -; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB4_1: -; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: .LBB4_2: # %cond.end ; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; X86-NOCMOV-NEXT: retl ; @@ -292,16 +290,14 @@ define i8 @ctlz_i8_zero_test(i8 %n) { define i16 @ctlz_i16_zero_test(i16 %n) { ; X86-NOCMOV-LABEL: ctlz_i16_zero_test: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testw %ax, %ax -; X86-NOCMOV-NEXT: je .LBB5_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrw %ax, %ax -; X86-NOCMOV-NEXT: xorl $15, %eax -; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB5_1: +; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movw $16, %ax +; X86-NOCMOV-NEXT: testw %cx, %cx +; X86-NOCMOV-NEXT: je .LBB5_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrw %cx, %ax +; X86-NOCMOV-NEXT: xorl $15, %eax +; X86-NOCMOV-NEXT: .LBB5_2: # %cond.end ; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NOCMOV-NEXT: retl ; @@ -349,15 +345,14 @@ define i16 @ctlz_i16_zero_test(i16 %n) { define i32 @ctlz_i32_zero_test(i32 %n) { ; X86-NOCMOV-LABEL: ctlz_i32_zero_test: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB6_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB6_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB6_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB6_2: # %cond.end ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: ctlz_i32_zero_test: @@ -587,16 +582,14 @@ define i32 @ctlz_bsr(i32 %n) { define i32 @ctlz_bsr_zero_test(i32 %n) { ; X86-NOCMOV-LABEL: ctlz_bsr_zero_test: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB10_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB10_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB10_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB10_2: # %cond.end ; X86-NOCMOV-NEXT: xorl $31, %eax ; X86-NOCMOV-NEXT: retl ; @@ -951,18 +944,15 @@ define i8 @ctlz_xor7_i8_true(i8 %x) { define i8 @ctlz_xor7_i8_false(i8 %x) { ; X86-NOCMOV-LABEL: ctlz_xor7_i8_false: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testb %al, %al -; X86-NOCMOV-NEXT: je .LBB16_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: movzbl %al, %eax +; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: testb %cl, %cl +; X86-NOCMOV-NEXT: je .LBB16_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: movzbl %cl, %eax ; X86-NOCMOV-NEXT: bsrl %eax, %eax ; X86-NOCMOV-NEXT: xorl $7, %eax -; X86-NOCMOV-NEXT: xorb $7, %al -; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB16_1: -; X86-NOCMOV-NEXT: movb $8, %al +; X86-NOCMOV-NEXT: .LBB16_2: # %cond.end ; X86-NOCMOV-NEXT: xorb $7, %al ; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; X86-NOCMOV-NEXT: retl @@ -1066,16 +1056,14 @@ define i16 @ctlz_xor15_i16_true(i16 %x) { define i32 @ctlz_xor31_i32_false(i32 %x) { ; X86-NOCMOV-LABEL: ctlz_xor31_i32_false: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB18_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB18_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB18_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB18_2: # %cond.end ; X86-NOCMOV-NEXT: xorl $31, %eax ; X86-NOCMOV-NEXT: retl ; @@ -1208,16 +1196,14 @@ define i64 @ctlz_xor63_i64_true(i64 %x) { define i64 @ctlz_i32_sext(i32 %x) { ; X86-NOCMOV-LABEL: ctlz_i32_sext: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB20_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: jmp .LBB20_3 -; X86-NOCMOV-NEXT: .LBB20_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax -; X86-NOCMOV-NEXT: .LBB20_3: # %cond.end +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB20_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB20_2: # %cond.end ; X86-NOCMOV-NEXT: xorl $31, %eax ; X86-NOCMOV-NEXT: xorl %edx, %edx ; X86-NOCMOV-NEXT: retl @@ -1270,16 +1256,14 @@ define i64 @ctlz_i32_sext(i32 %x) { define i64 @ctlz_i32_zext(i32 %x) { ; X86-NOCMOV-LABEL: ctlz_i32_zext: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB21_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: bsrl %eax, %eax -; X86-NOCMOV-NEXT: xorl $31, %eax -; X86-NOCMOV-NEXT: jmp .LBB21_3 -; X86-NOCMOV-NEXT: .LBB21_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax -; X86-NOCMOV-NEXT: .LBB21_3: # %cond.end +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB21_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: bsrl %ecx, %eax +; X86-NOCMOV-NEXT: xorl $31, %eax +; X86-NOCMOV-NEXT: .LBB21_2: # %cond.end ; X86-NOCMOV-NEXT: xorl $31, %eax ; X86-NOCMOV-NEXT: xorl %edx, %edx ; X86-NOCMOV-NEXT: retl diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll index db949827af007..46f3ecaa155e0 100644 --- a/llvm/test/CodeGen/X86/cttz.ll +++ b/llvm/test/CodeGen/X86/cttz.ll @@ -305,14 +305,13 @@ define i16 @cttz_i16_zero_test(i16 %n) { define i32 @cttz_i32_zero_test(i32 %n) { ; X86-NOCMOV-LABEL: cttz_i32_zero_test: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB6_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: rep bsfl %eax, %eax -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB6_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB6_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax +; X86-NOCMOV-NEXT: .LBB6_2: # %cond.end ; X86-NOCMOV-NEXT: retl ; ; X86-CMOV-LABEL: cttz_i32_zero_test: @@ -356,7 +355,6 @@ define i64 @cttz_i64_zero_test(i64 %n) { ; X86-NOCMOV-LABEL: cttz_i64_zero_test: ; X86-NOCMOV: # %bb.0: ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOCMOV-NOT: rep ; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx ; X86-NOCMOV-NEXT: movl $32, %eax ; X86-NOCMOV-NEXT: je .LBB7_2 @@ -377,12 +375,10 @@ define i64 @cttz_i64_zero_test(i64 %n) { ; X86-CMOV-LABEL: cttz_i64_zero_test: ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NOT: rep ; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx ; X86-CMOV-NEXT: movl $32, %edx ; X86-CMOV-NEXT: cmovnel %ecx, %edx ; X86-CMOV-NEXT: addl $32, %edx -; X86-CMOV-NOT: rep ; X86-CMOV-NEXT: bsfl %eax, %eax ; X86-CMOV-NEXT: cmovel %edx, %eax ; X86-CMOV-NEXT: xorl %edx, %edx @@ -589,13 +585,11 @@ define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) { define i32 @cttz_i32_osize(i32 %x) optsize { ; X86-LABEL: cttz_i32_osize: ; X86: # %bb.0: -; X86-NOT: rep ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: cttz_i32_osize: ; X64: # %bb.0: -; X64-NOT: rep ; X64-NEXT: bsfl %edi, %eax ; X64-NEXT: retq ; @@ -625,13 +619,11 @@ define i32 @cttz_i32_osize(i32 %x) optsize { define i32 @cttz_i32_msize(i32 %x) minsize { ; X86-LABEL: cttz_i32_msize: ; X86: # %bb.0: -; X86-NOT: rep ; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: cttz_i32_msize: ; X64: # %bb.0: -; X64-NOT: rep ; X64-NEXT: bsfl %edi, %eax ; X64-NEXT: retq ; @@ -661,15 +653,13 @@ define i32 @cttz_i32_msize(i32 %x) minsize { define i64 @cttz_i32_sext(i32 %x) { ; X86-NOCMOV-LABEL: cttz_i32_sext: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB12_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: rep bsfl %eax, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB12_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB12_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax +; X86-NOCMOV-NEXT: .LBB12_2: # %cond.end ; X86-NOCMOV-NEXT: xorl %edx, %edx ; X86-NOCMOV-NEXT: retl ; @@ -716,15 +706,13 @@ define i64 @cttz_i32_sext(i32 %x) { define i64 @cttz_i32_zext(i32 %x) { ; X86-NOCMOV-LABEL: cttz_i32_zext: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: testl %eax, %eax -; X86-NOCMOV-NEXT: je .LBB13_1 -; X86-NOCMOV-NEXT: # %bb.2: # %cond.false -; X86-NOCMOV-NEXT: rep bsfl %eax, %eax -; X86-NOCMOV-NEXT: xorl %edx, %edx -; X86-NOCMOV-NEXT: retl -; X86-NOCMOV-NEXT: .LBB13_1: +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOCMOV-NEXT: movl $32, %eax +; X86-NOCMOV-NEXT: testl %ecx, %ecx +; X86-NOCMOV-NEXT: je .LBB13_2 +; X86-NOCMOV-NEXT: # %bb.1: # %cond.false +; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax +; X86-NOCMOV-NEXT: .LBB13_2: # %cond.end ; X86-NOCMOV-NEXT: xorl %edx, %edx ; X86-NOCMOV-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll index c1beb7c803b2b..b3b111e4a1e49 100644 --- a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll +++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll @@ -66,24 +66,22 @@ define void @simple_urem_to_sel_fail_not_in_loop(i32 %N, i32 %rem_amt) nounwind ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB1_1 -; CHECK-NEXT: # %bb.3: # %for.body.preheader -; CHECK-NEXT: movl %edi, %r14d -; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: je .LBB1_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB1_4: # %for.body +; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: movl %r14d, %edi ; CHECK-NEXT: callq use.i32@PLT -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: cmpl %ebp, %r14d -; CHECK-NEXT: jne .LBB1_4 -; CHECK-NEXT: jmp .LBB1_2 -; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: .LBB1_2: # %for.cond.cleanup -; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB1_2 +; CHECK-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-NEXT: movl %r14d, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ebx ; CHECK-NEXT: movl %edx, %edi @@ -848,15 +846,13 @@ define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) n ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB14_1 -; CHECK-NEXT: # %bb.2: # %for.body1 +; CHECK-NEXT: je .LBB14_2 +; CHECK-NEXT: # %bb.1: # %for.body1 ; CHECK-NEXT: movl $1, %r14d -; CHECK-NEXT: jmp .LBB14_3 -; CHECK-NEXT: .LBB14_1: -; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB14_3: # %for.body +; CHECK-NEXT: .LBB14_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl %r14d, %eax ; CHECK-NEXT: xorl %edx, %edx @@ -865,8 +861,8 @@ define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) n ; CHECK-NEXT: callq use.i32@PLT ; CHECK-NEXT: incl %r14d ; CHECK-NEXT: cmpl %r14d, %ebp -; CHECK-NEXT: jne .LBB14_3 -; CHECK-NEXT: # %bb.4: # %for.cond.cleanup +; CHECK-NEXT: jne .LBB14_2 +; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %rbp @@ -974,27 +970,25 @@ define void @simple_urem_fail_bad_loop(i32 %N, i32 %rem_amt) nounwind { ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edi, %ebp ; CHECK-NEXT: callq get.i32@PLT -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: # implicit-def: $r14d -; CHECK-NEXT: jne .LBB16_4 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: .LBB16_2: # %for.cond +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne .LBB16_3 +; CHECK-NEXT: .LBB16_1: # %for.cond ; CHECK-NEXT: cmpl %ebp, %r14d -; CHECK-NEXT: jae .LBB16_5 -; CHECK-NEXT: # %bb.3: # %for.body +; CHECK-NEXT: jae .LBB16_4 +; CHECK-NEXT: # %bb.2: # %for.body ; CHECK-NEXT: movl %r14d, %edi ; CHECK-NEXT: xorl $1, %edi ; CHECK-NEXT: callq use.i32@PLT -; CHECK-NEXT: .LBB16_4: # %halfway +; CHECK-NEXT: .LBB16_3: # %halfway ; CHECK-NEXT: movl %r14d, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ebx ; CHECK-NEXT: movl %edx, %edi ; CHECK-NEXT: callq use.i32@PLT ; CHECK-NEXT: incl %r14d -; CHECK-NEXT: jmp .LBB16_2 -; CHECK-NEXT: .LBB16_5: # %for.end +; CHECK-NEXT: jmp .LBB16_1 +; CHECK-NEXT: .LBB16_4: # %for.end ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/lsr-sort.ll b/llvm/test/CodeGen/X86/lsr-sort.ll index 37cb6f996d34f..b1817d048b36d 100644 --- a/llvm/test/CodeGen/X86/lsr-sort.ll +++ b/llvm/test/CodeGen/X86/lsr-sort.ll @@ -8,15 +8,17 @@ define dso_local i32 @foo(i32 %N) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: jle .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb.preheader +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: .LBB0_2: # %bb ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movw %ax, X(%rip) ; CHECK-NEXT: incl %eax ; CHECK-NEXT: cmpl %eax, %edi -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: .LBB0_2: # %return +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %return ; CHECK-NEXT: retq entry: %0 = icmp sgt i32 %N, 0 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll index 73d459ba77026..d7f00cd78574f 100644 --- a/llvm/test/CodeGen/X86/mmx-arith.ll +++ b/llvm/test/CodeGen/X86/mmx-arith.ll @@ -403,14 +403,16 @@ define <1 x i64> @test3(ptr %a, ptr %b, i32 %count) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: je .LBB3_1 -; X86-NEXT: # %bb.2: # %bb26.preheader +; X86-NEXT: movl $0, %edx +; X86-NEXT: je .LBB3_3 +; X86-NEXT: # %bb.1: # %bb26.preheader ; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: .p2align 4 -; X86-NEXT: .LBB3_3: # %bb26 +; X86-NEXT: .LBB3_2: # %bb26 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl (%edi,%ebx,8), %ebp @@ -424,12 +426,8 @@ define <1 x i64> @test3(ptr %a, ptr %b, i32 %count) nounwind { ; X86-NEXT: movl %esi, %ecx ; X86-NEXT: incl %ebx ; X86-NEXT: cmpl %esi, %ebx -; X86-NEXT: jb .LBB3_3 -; X86-NEXT: jmp .LBB3_4 -; X86-NEXT: .LBB3_1: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: .LBB3_4: # %bb31 +; X86-NEXT: jb .LBB3_2 +; X86-NEXT: .LBB3_3: # %bb31 ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll index 05acbc8e90101..024640420e499 100644 --- a/llvm/test/CodeGen/X86/pr2659.ll +++ b/llvm/test/CodeGen/X86/pr2659.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=i686-apple-darwin9.4.0 -disable-branch-fold | FileCheck %s ; PR2659 @@ -5,6 +6,37 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i686-apple-darwin9.4.0" define i32 @binomial(i32 %n, i32 %k) nounwind { +; CHECK-LABEL: binomial: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: cmpl %esi, %ecx +; CHECK-NEXT: jbe LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %ifthen +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: jmp LBB0_5 +; CHECK-NEXT: LBB0_2: ## %forcond.preheader +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je LBB0_5 +; CHECK-NEXT: ## %bb.3: ## %forbody.preheader +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: LBB0_4: ## %forbody +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: imull %esi, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %edi +; CHECK-NEXT: incl %edi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: cmpl %ecx, %edi +; CHECK-NEXT: jbe LBB0_4 +; CHECK-NEXT: LBB0_5: ## %afterfor +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl entry: %cmp = icmp ugt i32 %k, %n ; [#uses=1] br i1 %cmp, label %ifthen, label %forcond.preheader @@ -13,18 +45,8 @@ forcond.preheader: ; preds = %entry %cmp44 = icmp eq i32 %k, 0 ; [#uses=1] br i1 %cmp44, label %afterfor, label %forbody -; CHECK: %forcond.preheader -; CHECK: testl -; CHECK-NOT: xorl -; CHECK-NOT: movl -; CHECK-NOT: LBB -; CHECK: je ; There should be no moves required in the for loop body. -; CHECK: %forbody{{$}} -; CHECK-NOT: mov -; CHECK: jbe -; CHECK: movl $1 ifthen: ; preds = %entry ret i32 0 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index c3c96e8228797..188270efe8a72 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -25,126 +25,123 @@ define dso_local void @fn() { ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: # implicit-def: $ecx ; CHECK-NEXT: # implicit-def: $edi -; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # kill: killed $al ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: .LBB0_14: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_18 Depth 2 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movl $.str, (%esp) +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload +; CHECK-NEXT: jmp .LBB0_5 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload -; CHECK-NEXT: cmpb $8, %dh -; CHECK-NEXT: jg .LBB0_7 +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: cmpb $8, %dl +; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.4: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: jne .LBB0_14 +; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: # %bb.5: # %for.cond35 +; CHECK-NEXT: .LBB0_5: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: .LBB0_10: # %af +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.6: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: .LBB0_16: # %if.end39 +; CHECK-NEXT: jne .LBB0_7 +; CHECK-NEXT: .LBB0_15: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_18 -; CHECK-NEXT: # %bb.17: # %if.then41 +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: # %bb.16: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_18: # %for.end46 +; CHECK-NEXT: .LBB0_17: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_19 -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_7: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_8 -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_6: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_19: # %for.cond47 +; CHECK-NEXT: .LBB0_18: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: .LBB0_8: # %ae +; CHECK-NEXT: jne .LBB0_18 +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.12: # %if.end26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.11: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb %dh, %dh -; CHECK-NEXT: je .LBB0_15 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je .LBB0_14 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_15 -; CHECK-NEXT: # %bb.14: # %if.then31 +; CHECK-NEXT: jne .LBB0_14 +; CHECK-NEXT: # %bb.13: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_15 +; CHECK-NEXT: jmp .LBB0_14 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_9: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl ; CHECK-NEXT: # kill: killed $cl -; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: jmp .LBB0_5 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/probe-stack-eflags.ll b/llvm/test/CodeGen/X86/probe-stack-eflags.ll index cc1839bba7e89..92a6ab2b7bf6a 100644 --- a/llvm/test/CodeGen/X86/probe-stack-eflags.ll +++ b/llvm/test/CodeGen/X86/probe-stack-eflags.ll @@ -14,9 +14,10 @@ define i32 @f(i32 %a, i32 %b) #0 { ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_1 -; CHECK-NEXT: # %bb.2: # %bb16.i +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb16.i ; CHECK-NEXT: sets %bl ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: sets %bpl @@ -28,10 +29,7 @@ define i32 @f(i32 %a, i32 %b) #0 { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: xorb $1, %bl ; CHECK-NEXT: movzbl %bl, %eax -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB0_3: # %exit2 +; CHECK-NEXT: .LBB0_2: # %exit2 ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/taildup-heapallocsite.ll b/llvm/test/CodeGen/X86/taildup-heapallocsite.ll index 967e125f81352..459c8e28f9d3d 100644 --- a/llvm/test/CodeGen/X86/taildup-heapallocsite.ll +++ b/llvm/test/CodeGen/X86/taildup-heapallocsite.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -tail-dup-placement-threshold=4 | FileCheck %s ; Based on test case from PR43695: @@ -35,13 +36,12 @@ cond.end: ; preds = %entry, %cond.true ; CHECK-LABEL: taildupit: # @taildupit ; CHECK: testq -; CHECK: je +; CHECK: je [[CALL:.+]] +; CHECK: movl +; CHECK: [[CALL]]: ; CHECK: callq alloc ; CHECK-NEXT: [[L1:.Ltmp[0-9]+]] ; CHECK: jmp f2 # TAILCALL -; CHECK: callq alloc -; CHECK-NEXT: [[L3:.Ltmp[0-9]+]] -; CHECK: jmp f2 # TAILCALL ; CHECK-LABEL: .short 4423 # Record kind: S_GPROC32_ID ; CHECK: .short 4446 # Record kind: S_HEAPALLOCSITE @@ -49,11 +49,6 @@ cond.end: ; preds = %entry, %cond.true ; CHECK-NEXT: .secidx [[L0]] ; CHECK-NEXT: .short [[L1]]-[[L0]] ; CHECK-NEXT: .long 3 -; CHECK: .short 4446 # Record kind: S_HEAPALLOCSITE -; CHECK-NEXT: .secrel32 [[L2:.Ltmp[0-9]+]] -; CHECK-NEXT: .secidx [[L2]] -; CHECK-NEXT: .short [[L3]]-[[L2]] -; CHECK-NEXT: .long 3 declare dso_local ptr @alloc(i32) diff --git a/llvm/test/CodeGen/X86/testb-je-fusion.ll b/llvm/test/CodeGen/X86/testb-je-fusion.ll index d4a2525744da3..72ee675b23861 100644 --- a/llvm/test/CodeGen/X86/testb-je-fusion.ll +++ b/llvm/test/CodeGen/X86/testb-je-fusion.ll @@ -52,26 +52,24 @@ if.end: define i32 @macrofuse_cmp_je(i32 %flags, ptr %p) nounwind { ; NOFUSION-LABEL: macrofuse_cmp_je: ; NOFUSION: # %bb.0: # %entry +; NOFUSION-NEXT: xorl %eax, %eax ; NOFUSION-NEXT: cmpl $512, %edi # imm = 0x200 ; NOFUSION-NEXT: movb $1, (%rsi) -; NOFUSION-NEXT: je .LBB1_1 -; NOFUSION-NEXT: # %bb.2: # %if.then +; NOFUSION-NEXT: je .LBB1_2 +; NOFUSION-NEXT: # %bb.1: # %if.then ; NOFUSION-NEXT: movl $1, %eax -; NOFUSION-NEXT: retq -; NOFUSION-NEXT: .LBB1_1: -; NOFUSION-NEXT: xorl %eax, %eax +; NOFUSION-NEXT: .LBB1_2: # %if.end ; NOFUSION-NEXT: retq ; ; FUSION-LABEL: macrofuse_cmp_je: ; FUSION: # %bb.0: # %entry +; FUSION-NEXT: xorl %eax, %eax ; FUSION-NEXT: movb $1, (%rsi) ; FUSION-NEXT: cmpl $512, %edi # imm = 0x200 -; FUSION-NEXT: je .LBB1_1 -; FUSION-NEXT: # %bb.2: # %if.then +; FUSION-NEXT: je .LBB1_2 +; FUSION-NEXT: # %bb.1: # %if.then ; FUSION-NEXT: movl $1, %eax -; FUSION-NEXT: retq -; FUSION-NEXT: .LBB1_1: -; FUSION-NEXT: xorl %eax, %eax +; FUSION-NEXT: .LBB1_2: # %if.end ; FUSION-NEXT: retq entry: %sub = sub i32 %flags, 512 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index f007b316b92b2..a2b80e027eb30 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1381,24 +1381,22 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax ; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4 -; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: LBB16_4: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_5 -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_4 +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1424,24 +1422,22 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax ; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4 -; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: LBB16_4: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_5 -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_4 +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx diff --git a/llvm/test/DebugInfo/COFF/pieces.ll b/llvm/test/DebugInfo/COFF/pieces.ll index 8e62ad0093aa3..e826fa85cac41 100644 --- a/llvm/test/DebugInfo/COFF/pieces.ll +++ b/llvm/test/DebugInfo/COFF/pieces.ll @@ -37,12 +37,12 @@ ; ASM-LABEL: loop_csr: # @loop_csr ; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 0 32] 0 ; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 32 32] 0 -; ASM: # %bb.2: # %for.body.preheader +; ASM: # %bb.1: # %for.body.preheader ; ASM: xorl %edi, %edi ; ASM: xorl %esi, %esi ; ASM: [[oy_ox_start:\.Ltmp[0-9]+]]: ; ASM: .p2align 4 -; ASM: .LBB0_3: # %for.body +; ASM: .LBB0_2: # %for.body ; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 0 32] $edi ; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 32 32] $esi ; ASM: .cv_loc 0 1 13 11 # t.c:13:11 @@ -58,19 +58,11 @@ ; ASM: [[oy_start:\.Ltmp[0-9]+]]: ; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 32 32] $esi ; ASM: cmpl n(%rip), %eax -; ASM: jl .LBB0_3 -; ASM: [[loopskip_start:\.Ltmp[0-9]+]]: -; ASM: #DEBUG_VALUE: loop_csr:o <- [DW_OP_LLVM_fragment 0 32] 0 -; ASM: xorl %esi, %esi -; ASM: xorl %edi, %edi +; ASM: jl .LBB0_2 ; ASM: [[oy_end:\.Ltmp[0-9]+]]: ; ASM: addl %edi, %esi ; ASM: movl %esi, %eax -; XXX FIXME: the debug value line after loopskip_start should be repeated -; because both fields of 'o' are zero flowing into this block. However, it -; appears livedebugvalues doesn't account for fragments. - ; ASM-LABEL: pad_right: # @pad_right ; ASM: movq %rcx, %rax ; ASM: [[pad_right_tmp:\.Ltmp[0-9]+]]: