Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
"If the branch threshold is higher than this threshold, we allow "
"speculative execution of up to 1 instruction to avoid branching to "
"splitted critical edge"),
cl::init(40), cl::Hidden);
cl::init(35), cl::Hidden);

static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
"machine-sink-load-instrs-threshold",
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -632,20 +632,18 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
;
; CHECK-GI-LABEL: red_mla_dup_ext_u8_s8_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: cbz w2, .LBB5_3
; CHECK-GI-NEXT: mov w8, wzr
; CHECK-GI-NEXT: cbz w2, .LBB5_9
; CHECK-GI-NEXT: // %bb.1: // %for.body.preheader
; CHECK-GI-NEXT: cmp w2, #16
; CHECK-GI-NEXT: mov w8, w2
; CHECK-GI-NEXT: b.hs .LBB5_4
; CHECK-GI-NEXT: b.hs .LBB5_3
; CHECK-GI-NEXT: // %bb.2:
; CHECK-GI-NEXT: mov w10, #0 // =0x0
; CHECK-GI-NEXT: mov x9, xzr
; CHECK-GI-NEXT: fmov s0, w10
; CHECK-GI-NEXT: b .LBB5_8
; CHECK-GI-NEXT: .LBB5_3:
; CHECK-GI-NEXT: mov w0, wzr
; CHECK-GI-NEXT: ret
; CHECK-GI-NEXT: .LBB5_4: // %vector.ph
; CHECK-GI-NEXT: b .LBB5_7
; CHECK-GI-NEXT: .LBB5_3: // %vector.ph
; CHECK-GI-NEXT: lsl w9, w1, #8
; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
Expand All @@ -654,7 +652,7 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
; CHECK-GI-NEXT: dup v2.8h, w9
; CHECK-GI-NEXT: and x9, x8, #0xfffffff0
; CHECK-GI-NEXT: mov x11, x9
; CHECK-GI-NEXT: .LBB5_5: // %vector.body
; CHECK-GI-NEXT: .LBB5_4: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldp d3, d4, [x10, #-8]
; CHECK-GI-NEXT: subs x11, x11, #16
Expand All @@ -663,29 +661,31 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
; CHECK-GI-NEXT: ushll v4.8h, v4.8b, #0
; CHECK-GI-NEXT: mla v0.8h, v2.8h, v3.8h
; CHECK-GI-NEXT: mla v1.8h, v2.8h, v4.8h
; CHECK-GI-NEXT: b.ne .LBB5_5
; CHECK-GI-NEXT: // %bb.6: // %middle.block
; CHECK-GI-NEXT: b.ne .LBB5_4
; CHECK-GI-NEXT: // %bb.5: // %middle.block
; CHECK-GI-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: cmp x9, x8
; CHECK-GI-NEXT: addv h0, v0.8h
; CHECK-GI-NEXT: b.ne .LBB5_8
; CHECK-GI-NEXT: // %bb.7:
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: b.ne .LBB5_7
; CHECK-GI-NEXT: // %bb.6:
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: ret
; CHECK-GI-NEXT: .LBB5_8: // %for.body.preheader1
; CHECK-GI-NEXT: .LBB5_7: // %for.body.preheader1
; CHECK-GI-NEXT: sxtb w10, w1
; CHECK-GI-NEXT: sub x8, x8, x9
; CHECK-GI-NEXT: sub x11, x8, x9
; CHECK-GI-NEXT: add x9, x0, x9
; CHECK-GI-NEXT: .LBB5_9: // %for.body
; CHECK-GI-NEXT: .LBB5_8: // %for.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrb w11, [x9], #1
; CHECK-GI-NEXT: ldrb w8, [x9], #1
; CHECK-GI-NEXT: fmov w12, s0
; CHECK-GI-NEXT: subs x8, x8, #1
; CHECK-GI-NEXT: mul w11, w11, w10
; CHECK-GI-NEXT: add w0, w11, w12, uxth
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: b.ne .LBB5_9
; CHECK-GI-NEXT: // %bb.10: // %for.cond.cleanup
; CHECK-GI-NEXT: subs x11, x11, #1
; CHECK-GI-NEXT: mul w8, w8, w10
; CHECK-GI-NEXT: add w8, w8, w12, uxth
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: b.ne .LBB5_8
; CHECK-GI-NEXT: .LBB5_9: // %for.cond.cleanup
; CHECK-GI-NEXT: mov w0, w8
; CHECK-GI-NEXT: ret
entry:
%conv2 = sext i8 %B to i16
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/swifterror.ll
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE-NEXT: .cfi_def_cfa w29, 16
; CHECK-APPLE-NEXT: .cfi_offset w30, -8
; CHECK-APPLE-NEXT: .cfi_offset w29, -16
; CHECK-APPLE-NEXT: movi d0, #0000000000000000
; CHECK-APPLE-NEXT: cbz w0, LBB3_2
; CHECK-APPLE-NEXT: ; %bb.1: ; %gen_error
; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10
Expand All @@ -420,10 +421,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) {
; CHECK-APPLE-NEXT: fmov s0, #1.00000000
; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1
; CHECK-APPLE-NEXT: strb w8, [x0, #8]
; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-APPLE-NEXT: ret
; CHECK-APPLE-NEXT: LBB3_2:
; CHECK-APPLE-NEXT: movi d0, #0000000000000000
; CHECK-APPLE-NEXT: LBB3_2: ; %common.ret
; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-APPLE-NEXT: ret
;
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
; CHECK-NEXT: s_mov_b32 s0, 1
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
Expand Down Expand Up @@ -330,15 +331,12 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: .LBB1_2: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s0, 1
; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_cbranch_scc1 .LBB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
; CHECK-NEXT: s_sub_i32 s0, 0, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
Expand All @@ -358,7 +356,7 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: ; return to shader part epilog
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
; CHECK-NEXT: s_mov_b32 s7, 1
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
Expand Down Expand Up @@ -322,15 +323,12 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: .LBB1_2: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s7, 1
; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_cbranch_scc1 .LBB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4
; CHECK-NEXT: s_sub_i32 s0, 0, s4
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
Expand All @@ -348,7 +346,7 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: ; return to shader part epilog
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: v_mov_b32_e32 v0, s3
Expand Down Expand Up @@ -318,15 +319,12 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: .LBB1_2: ; %Flow
; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_cbranch_scc1 .LBB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
; CHECK-NEXT: s_sub_i32 s1, 0, s2
; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand All @@ -345,7 +343,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: ; return to shader part epilog
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
; CHECK-NEXT: s_mov_b32 s6, 1
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: v_mov_b32_e32 v0, s3
Expand Down Expand Up @@ -314,15 +315,12 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: .LBB1_2: ; %Flow
; CHECK-NEXT: s_xor_b32 s1, s6, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_cbranch_scc1 .LBB1_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
; CHECK-NEXT: s_sub_i32 s1, 0, s2
; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand All @@ -339,7 +337,7 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: ; return to shader part epilog
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/AMDGPU/artificial-terminators.mir
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,14 @@ body: |
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.2(0x50000000)
; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.2(0x50000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_3]], [[S_MOV_B32_1]], implicit $exec
; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[V_CMP_LT_I32_e64_]], implicit-def $scc
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_XOR_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[V_CMP_LT_I32_e64_]]
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_XOR_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
Expand All @@ -64,7 +60,7 @@ body: |
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.5, [[S_OR_B32_]], %bb.2, [[S_OR_B32_]], %bb.3
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY3]], %bb.1, [[S_OR_B32_]], %bb.2, [[S_OR_B32_]], %bb.3
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[PHI]], implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
bb.0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,34 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; CHECK-NEXT: s_load_dwordx8 s[36:43], s[8:9], 0x0
; CHECK-NEXT: s_load_dwordx8 s[20:27], s[8:9], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s12, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s40, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
; CHECK-NEXT: s_cmp_eq_u32 s42, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
; CHECK-NEXT: s_cmp_lg_u32 s43, 0
; CHECK-NEXT: s_mov_b32 s17, 0
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: s_mov_b32 s36, 0
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_mov_b32 s14, s12
; CHECK-NEXT: s_mov_b32 s15, s12
; CHECK-NEXT: s_mov_b32 s13, s12
; CHECK-NEXT: s_mov_b64 s[38:39], s[14:15]
; CHECK-NEXT: s_mov_b64 s[36:37], s[12:13]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s24, 0
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_mov_b64 s[38:39], s[22:23]
; CHECK-NEXT: s_mov_b64 s[36:37], s[20:21]
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s41, 0
; CHECK-NEXT: s_mov_b32 s36, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
; CHECK-NEXT: .LBB0_2: ; %if.end13.i.i
; CHECK-NEXT: s_mov_b32 s37, s36
; CHECK-NEXT: s_mov_b32 s38, s36
; CHECK-NEXT: s_cmp_eq_u32 s26, 0
; CHECK-NEXT: s_mov_b32 s39, s36
; CHECK-NEXT: s_cbranch_scc1 .LBB0_6
; CHECK-NEXT: ; %bb.3: ; %if.else251.i.i
; CHECK-NEXT: s_cmp_lg_u32 s27, 0
; CHECK-NEXT: s_mov_b32 s17, 0
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_8
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_mov_b32 s36, 0
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
; CHECK-NEXT: s_cbranch_vccnz .LBB0_6
; CHECK-NEXT: .LBB0_5: ; %if.end273.i.i
; CHECK-NEXT: s_add_u32 s12, s8, 40
; CHECK-NEXT: s_addc_u32 s13, s9, 0
; CHECK-NEXT: s_getpc_b64 s[18:19]
Expand All @@ -72,13 +62,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s37, s36
; CHECK-NEXT: s_mov_b32 s38, s36
; CHECK-NEXT: s_mov_b32 s39, s36
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
; CHECK-NEXT: .LBB0_6: ; %if.end294.i.i
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
; CHECK-NEXT: .LBB0_7: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
; CHECK-NEXT: v_mov_b32_e32 v0, s36
; CHECK-NEXT: v_mov_b32_e32 v4, 0
Expand All @@ -88,6 +78,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_8: ; %if.then263.i.i
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s25, 0
; CHECK-NEXT: s_mov_b32 s36, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
; CHECK-NEXT: s_mov_b32 s37, s36
; CHECK-NEXT: s_mov_b32 s38, s36
; CHECK-NEXT: s_mov_b32 s39, s36
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: s_branch .LBB0_6
entry:
%cmp5.i.i = icmp eq i32 %cmp5.i.i.arg, 0
br i1 %cmp5.i.i, label %if.end13.i.i, label %kernel_direct_lighting.exit
Expand Down
Loading