Skip to content

Commit b60caca

Browse files
committed
Fix AMDGPU tests
1 parent 70d14b5 commit b60caca

File tree

6 files changed

+219
-263
lines changed

6 files changed

+219
-263
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -480,29 +480,58 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
480480
; GFX906-LABEL: v8i8_phi_chain:
481481
; GFX906: ; %bb.0: ; %entry
482482
; GFX906-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
483-
; GFX906-NEXT: v_lshlrev_b32_e32 v3, 3, v0
484-
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
485-
; GFX906-NEXT: s_xor_b64 s[0:1], vcc, -1
483+
; GFX906-NEXT: v_lshlrev_b32_e32 v5, 3, v0
484+
; GFX906-NEXT: v_cmp_le_u32_e32 vcc, 15, v0
485+
; GFX906-NEXT: v_cmp_gt_u32_e64 s[0:1], 7, v0
486+
; GFX906-NEXT: s_or_b64 s[2:3], vcc, s[0:1]
486487
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
487-
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[8:9]
488-
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
488+
; GFX906-NEXT: global_load_dwordx2 v[3:4], v5, s[8:9]
489+
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[10:11]
490+
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
489491
; GFX906-NEXT: s_cbranch_execz .LBB8_2
490-
; GFX906-NEXT: ; %bb.1: ; %bb.1
491-
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[10:11]
492-
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
493-
; GFX906-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
494-
; GFX906-NEXT: s_and_b64 s[4:5], exec, vcc
495-
; GFX906-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
496-
; GFX906-NEXT: .LBB8_2: ; %Flow
497-
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
498-
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
499-
; GFX906-NEXT: s_cbranch_execz .LBB8_4
500-
; GFX906-NEXT: ; %bb.3: ; %bb.2
501-
; GFX906-NEXT: v_mov_b32_e32 v0, 0
492+
; GFX906-NEXT: ; %bb.1: ; %bb.2
493+
; GFX906-NEXT: s_waitcnt vmcnt(1)
494+
; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v3
495+
; GFX906-NEXT: v_lshrrev_b32_e32 v7, 8, v4
496+
; GFX906-NEXT: v_lshrrev_b32_e32 v9, 24, v4
502497
; GFX906-NEXT: s_waitcnt vmcnt(0)
498+
; GFX906-NEXT: v_lshrrev_b32_e32 v10, 8, v1
499+
; GFX906-NEXT: v_lshrrev_b32_e32 v13, 8, v2
500+
; GFX906-NEXT: v_lshrrev_b32_e32 v15, 24, v2
501+
; GFX906-NEXT: v_lshrrev_b32_e32 v5, 16, v3
502+
; GFX906-NEXT: v_lshrrev_b32_e32 v6, 24, v3
503+
; GFX906-NEXT: v_lshrrev_b32_e32 v8, 16, v4
504+
; GFX906-NEXT: v_lshrrev_b32_e32 v11, 16, v1
505+
; GFX906-NEXT: v_lshrrev_b32_e32 v12, 24, v1
506+
; GFX906-NEXT: v_lshrrev_b32_e32 v14, 16, v2
507+
; GFX906-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
508+
; GFX906-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
509+
; GFX906-NEXT: v_cndmask_b32_e32 v4, v13, v7, vcc
510+
; GFX906-NEXT: v_cndmask_b32_e32 v7, v15, v9, vcc
511+
; GFX906-NEXT: v_mov_b32_e32 v9, 8
512+
; GFX906-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
513+
; GFX906-NEXT: v_cndmask_b32_e32 v3, v11, v5, vcc
514+
; GFX906-NEXT: v_cndmask_b32_e32 v5, v12, v6, vcc
515+
; GFX906-NEXT: v_cndmask_b32_e32 v6, v14, v8, vcc
516+
; GFX906-NEXT: v_mov_b32_e32 v8, 0xff
517+
; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
518+
; GFX906-NEXT: v_and_or_b32 v0, v1, v8, v0
519+
; GFX906-NEXT: v_and_b32_e32 v1, 0xff, v3
520+
; GFX906-NEXT: v_and_b32_e32 v3, 0xff, v5
521+
; GFX906-NEXT: v_lshlrev_b32_e32 v1, 16, v1
522+
; GFX906-NEXT: v_lshlrev_b32_e32 v3, 24, v3
523+
; GFX906-NEXT: v_or3_b32 v1, v0, v1, v3
524+
; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
525+
; GFX906-NEXT: v_and_or_b32 v0, v2, v8, v0
526+
; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v6
527+
; GFX906-NEXT: v_and_b32_e32 v3, 0xff, v7
528+
; GFX906-NEXT: v_lshlrev_b32_e32 v2, 16, v2
529+
; GFX906-NEXT: v_lshlrev_b32_e32 v3, 24, v3
530+
; GFX906-NEXT: v_or3_b32 v2, v0, v2, v3
531+
; GFX906-NEXT: v_mov_b32_e32 v0, 0
503532
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[12:13]
504-
; GFX906-NEXT: .LBB8_4: ; %bb.3
505-
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
533+
; GFX906-NEXT: .LBB8_2: ; %bb.3
534+
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
506535
; GFX906-NEXT: v_mov_b32_e32 v0, 0
507536
; GFX906-NEXT: s_waitcnt vmcnt(0)
508537
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[14:15]
@@ -535,29 +564,50 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
535564
; GFX906: ; %bb.0: ; %entry
536565
; GFX906-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
537566
; GFX906-NEXT: v_lshlrev_b32_e32 v5, 3, v0
538-
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
567+
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
568+
; GFX906-NEXT: v_cmp_gt_u32_e64 s[0:1], 15, v0
569+
; GFX906-NEXT: s_and_b64 s[2:3], s[0:1], vcc
539570
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
540-
; GFX906-NEXT: global_load_dwordx2 v[3:4], v5, s[8:9]
571+
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[8:9]
572+
; GFX906-NEXT: global_load_dwordx2 v[3:4], v5, s[10:11]
573+
; GFX906-NEXT: s_mov_b64 vcc, s[0:1]
574+
; GFX906-NEXT: v_mov_b32_e32 v6, 8
575+
; GFX906-NEXT: v_mov_b32_e32 v5, 0xff
576+
; GFX906-NEXT: s_waitcnt vmcnt(1)
577+
; GFX906-NEXT: v_lshrrev_b32_e32 v7, 8, v1
541578
; GFX906-NEXT: s_waitcnt vmcnt(0)
542-
; GFX906-NEXT: v_mov_b32_e32 v1, v3
543-
; GFX906-NEXT: v_mov_b32_e32 v2, v4
544-
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
545-
; GFX906-NEXT: s_cbranch_execz .LBB9_4
546-
; GFX906-NEXT: ; %bb.1: ; %bb.1
547-
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[10:11]
548-
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
549-
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
550-
; GFX906-NEXT: s_cbranch_execz .LBB9_3
551-
; GFX906-NEXT: ; %bb.2: ; %bb.2
579+
; GFX906-NEXT: v_lshrrev_b32_e32 v9, 8, v3
580+
; GFX906-NEXT: v_lshrrev_b32_e32 v8, 8, v2
581+
; GFX906-NEXT: v_lshrrev_b32_e32 v10, 8, v4
582+
; GFX906-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1]
583+
; GFX906-NEXT: v_cndmask_b32_sdwa v9, v1, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
584+
; GFX906-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[0:1]
585+
; GFX906-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[0:1]
586+
; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
587+
; GFX906-NEXT: v_and_b32_e32 v9, 0xff, v9
588+
; GFX906-NEXT: v_cndmask_b32_e64 v11, v2, v4, s[0:1]
589+
; GFX906-NEXT: v_cndmask_b32_sdwa v10, v1, v3, vcc dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3
590+
; GFX906-NEXT: v_lshlrev_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
591+
; GFX906-NEXT: v_and_or_b32 v0, v0, v5, v7
592+
; GFX906-NEXT: v_lshlrev_b32_e32 v7, 16, v9
593+
; GFX906-NEXT: v_cndmask_b32_sdwa v8, v2, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
594+
; GFX906-NEXT: v_and_or_b32 v6, v11, v5, v6
595+
; GFX906-NEXT: v_or3_b32 v5, v0, v7, v10
596+
; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v8
597+
; GFX906-NEXT: v_lshlrev_b32_e32 v0, 16, v0
598+
; GFX906-NEXT: v_cndmask_b32_sdwa v7, v2, v4, vcc dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3
599+
; GFX906-NEXT: v_or3_b32 v6, v6, v0, v7
600+
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
601+
; GFX906-NEXT: s_cbranch_execz .LBB9_2
602+
; GFX906-NEXT: ; %bb.1: ; %bb.2
603+
; GFX906-NEXT: v_mov_b32_e32 v6, v4
552604
; GFX906-NEXT: v_mov_b32_e32 v0, 0
553-
; GFX906-NEXT: global_store_dwordx2 v0, v[3:4], s[12:13]
554-
; GFX906-NEXT: .LBB9_3: ; %Flow
555-
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
556-
; GFX906-NEXT: .LBB9_4: ; %bb.3
605+
; GFX906-NEXT: v_mov_b32_e32 v5, v3
606+
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[12:13]
607+
; GFX906-NEXT: .LBB9_2: ; %bb.3
557608
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
558609
; GFX906-NEXT: v_mov_b32_e32 v0, 0
559-
; GFX906-NEXT: s_waitcnt vmcnt(0)
560-
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[14:15]
610+
; GFX906-NEXT: global_store_dwordx2 v0, v[5:6], s[14:15]
561611
; GFX906-NEXT: s_endpgm
562612
entry:
563613
%idx = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 26 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,73 +12,59 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
1212
; CHECK-NEXT: s_load_dwordx8 s[48:55], s[8:9], 0x0
1313
; CHECK-NEXT: s_add_u32 s0, s0, s17
1414
; CHECK-NEXT: s_addc_u32 s1, s1, 0
15-
; CHECK-NEXT: s_mov_b32 s12, 0
1615
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1716
; CHECK-NEXT: s_cmp_lg_u32 s52, 0
18-
; CHECK-NEXT: s_cbranch_scc1 .LBB0_9
19-
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
20-
; CHECK-NEXT: s_cmp_eq_u32 s54, 0
2117
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
22-
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
23-
; CHECK-NEXT: s_cmp_lg_u32 s55, 0
24-
; CHECK-NEXT: s_mov_b32 s17, 0
25-
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
26-
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
27-
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
28-
; CHECK-NEXT: ; %bb.3:
29-
; CHECK-NEXT: s_mov_b32 s18, 0
30-
; CHECK-NEXT: s_branch .LBB0_6
31-
; CHECK-NEXT: .LBB0_4:
32-
; CHECK-NEXT: s_mov_b32 s14, s12
33-
; CHECK-NEXT: s_mov_b32 s15, s12
34-
; CHECK-NEXT: s_mov_b32 s13, s12
35-
; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15]
36-
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
37-
; CHECK-NEXT: s_branch .LBB0_8
38-
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
18+
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
3919
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
40-
; CHECK-NEXT: s_mov_b32 s18, 1.0
41-
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
42-
; CHECK-NEXT: .LBB0_6: ; %Flow
43-
; CHECK-NEXT: s_mov_b32 s48, 1.0
44-
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
20+
; CHECK-NEXT: s_cmp_lg_u32 s55, 0
21+
; CHECK-NEXT: s_cselect_b32 s17, -1, 0
22+
; CHECK-NEXT: s_or_b32 s12, s17, s12
23+
; CHECK-NEXT: s_cmp_lg_u32 s54, 0
24+
; CHECK-NEXT: s_cselect_b32 s13, -1, 0
25+
; CHECK-NEXT: s_and_b32 s18, s13, exec_lo
26+
; CHECK-NEXT: s_cselect_b32 s48, 1.0, 0
27+
; CHECK-NEXT: s_and_b32 s12, s13, s12
4528
; CHECK-NEXT: s_mov_b32 s49, s48
4629
; CHECK-NEXT: s_mov_b32 s50, s48
30+
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
4731
; CHECK-NEXT: s_mov_b32 s51, s48
48-
; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
49-
; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
32+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_3
33+
; CHECK-NEXT: ; %bb.2: ; %if.end273.i.i
5034
; CHECK-NEXT: s_add_u32 s12, s8, 40
5135
; CHECK-NEXT: s_addc_u32 s13, s9, 0
52-
; CHECK-NEXT: s_getpc_b64 s[20:21]
53-
; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_@gotpcrel32@lo+4
54-
; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_@gotpcrel32@hi+12
36+
; CHECK-NEXT: s_getpc_b64 s[18:19]
37+
; CHECK-NEXT: s_add_u32 s18, s18, _Z3dotDv3_fS_@gotpcrel32@lo+4
38+
; CHECK-NEXT: s_addc_u32 s19, s19, _Z3dotDv3_fS_@gotpcrel32@hi+12
39+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 1.0, 0, s17
40+
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
41+
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0x7fc00000, 0, s17
5542
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
56-
; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
57-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
58-
; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
43+
; CHECK-NEXT: v_lshlrev_b32_e32 v5, 10, v1
5944
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
6045
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
46+
; CHECK-NEXT: v_add_f32_e32 v1, v4, v3
6147
; CHECK-NEXT: s_mov_b32 s12, s14
62-
; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2
63-
; CHECK-NEXT: v_mov_b32_e32 v0, v1
64-
; CHECK-NEXT: v_mov_b32_e32 v1, 0
48+
; CHECK-NEXT: v_or3_b32 v31, v0, v5, v2
6549
; CHECK-NEXT: v_mov_b32_e32 v2, 0
6650
; CHECK-NEXT: s_mov_b32 s13, s15
51+
; CHECK-NEXT: v_mov_b32_e32 v0, v1
52+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
6753
; CHECK-NEXT: s_mov_b32 s14, s16
6854
; CHECK-NEXT: s_mov_b32 s48, 0
6955
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
70-
; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
56+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
7157
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
7258
; CHECK-NEXT: s_mov_b32 s49, s48
7359
; CHECK-NEXT: s_mov_b32 s50, s48
7460
; CHECK-NEXT: s_mov_b32 s51, s48
75-
; CHECK-NEXT: .LBB0_8: ; %if.end294.i.i
61+
; CHECK-NEXT: .LBB0_3: ; %if.end294.i.i
7662
; CHECK-NEXT: v_mov_b32_e32 v0, 0
7763
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
7864
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
7965
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
8066
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
81-
; CHECK-NEXT: .LBB0_9: ; %kernel_direct_lighting.exit
67+
; CHECK-NEXT: .LBB0_4: ; %kernel_direct_lighting.exit
8268
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
8369
; CHECK-NEXT: v_mov_b32_e32 v0, s48
8470
; CHECK-NEXT: v_mov_b32_e32 v4, 0

llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,50 +20,48 @@ define amdgpu_ps void @main(i32 %0, float %1) {
2020
; ISA: ; %bb.0: ; %start
2121
; ISA-NEXT: v_readfirstlane_b32 s0, v0
2222
; ISA-NEXT: s_mov_b32 m0, s0
23-
; ISA-NEXT: s_mov_b32 s10, 0
23+
; ISA-NEXT: s_mov_b64 s[4:5], 0
2424
; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
2525
; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
26-
; ISA-NEXT: s_mov_b64 s[0:1], 0
27-
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
28-
; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
29-
; ISA-NEXT: s_branch .LBB0_3
30-
; ISA-NEXT: .LBB0_1: ; %Flow1
31-
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
32-
; ISA-NEXT: s_or_b64 exec, exec, s[4:5]
33-
; ISA-NEXT: s_mov_b64 s[8:9], 0
34-
; ISA-NEXT: s_mov_b64 s[4:5], s[6:7]
35-
; ISA-NEXT: .LBB0_2: ; %Flow
36-
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
37-
; ISA-NEXT: s_and_b64 s[6:7], exec, s[4:5]
38-
; ISA-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1]
39-
; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
40-
; ISA-NEXT: s_and_b64 s[6:7], s[8:9], exec
41-
; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
42-
; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
43-
; ISA-NEXT: s_cbranch_execz .LBB0_6
44-
; ISA-NEXT: .LBB0_3: ; %loop
26+
; ISA-NEXT: v_mov_b32_e32 v1, 0
27+
; ISA-NEXT: ; implicit-def: $sgpr10_sgpr11
28+
; ISA-NEXT: ; implicit-def: $sgpr8_sgpr9
29+
; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
30+
; ISA-NEXT: s_branch .LBB0_2
31+
; ISA-NEXT: .LBB0_1: ; %Flow
32+
; ISA-NEXT: ; in Loop: Header=BB0_2 Depth=1
33+
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
34+
; ISA-NEXT: s_and_b64 s[0:1], exec, s[8:9]
35+
; ISA-NEXT: s_or_b64 s[4:5], s[0:1], s[4:5]
36+
; ISA-NEXT: s_andn2_b64 s[0:1], s[6:7], exec
37+
; ISA-NEXT: s_and_b64 s[2:3], s[10:11], exec
38+
; ISA-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3]
39+
; ISA-NEXT: s_andn2_b64 exec, exec, s[4:5]
40+
; ISA-NEXT: s_cbranch_execz .LBB0_4
41+
; ISA-NEXT: .LBB0_2: ; %loop
4542
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
46-
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
47-
; ISA-NEXT: s_mov_b64 s[6:7], -1
48-
; ISA-NEXT: s_cmp_lt_u32 s10, 32
49-
; ISA-NEXT: s_mov_b64 s[8:9], -1
50-
; ISA-NEXT: s_cbranch_scc0 .LBB0_2
51-
; ISA-NEXT: ; %bb.4: ; %endif1
52-
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
53-
; ISA-NEXT: s_and_saveexec_b64 s[4:5], vcc
43+
; ISA-NEXT: v_cmp_lt_u32_e64 s[0:1], 31, v1
44+
; ISA-NEXT: v_cmp_gt_u32_e64 s[2:3], 32, v1
45+
; ISA-NEXT: s_andn2_b64 s[10:11], s[10:11], exec
46+
; ISA-NEXT: s_and_b64 s[0:1], s[0:1], exec
47+
; ISA-NEXT: s_and_b64 s[2:3], s[2:3], vcc
48+
; ISA-NEXT: s_or_b64 s[8:9], s[8:9], exec
49+
; ISA-NEXT: s_or_b64 s[10:11], s[10:11], s[0:1]
50+
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
5451
; ISA-NEXT: s_cbranch_execz .LBB0_1
55-
; ISA-NEXT: ; %bb.5: ; %endif2
56-
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
57-
; ISA-NEXT: s_add_i32 s10, s10, 1
58-
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
52+
; ISA-NEXT: ; %bb.3: ; %endif2
53+
; ISA-NEXT: ; in Loop: Header=BB0_2 Depth=1
54+
; ISA-NEXT: v_add_u32_e32 v1, 1, v1
55+
; ISA-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
56+
; ISA-NEXT: s_andn2_b64 s[10:11], s[10:11], exec
5957
; ISA-NEXT: s_branch .LBB0_1
60-
; ISA-NEXT: .LBB0_6: ; %Flow2
61-
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
58+
; ISA-NEXT: .LBB0_4: ; %Flow2
59+
; ISA-NEXT: s_or_b64 exec, exec, s[4:5]
6260
; ISA-NEXT: v_mov_b32_e32 v1, 0
63-
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
64-
; ISA-NEXT: ; %bb.7: ; %if1
61+
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[6:7]
62+
; ISA-NEXT: ; %bb.5: ; %if1
6563
; ISA-NEXT: v_sqrt_f32_e32 v1, v0
66-
; ISA-NEXT: ; %bb.8: ; %endloop
64+
; ISA-NEXT: ; %bb.6: ; %endloop
6765
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
6866
; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm
6967
; ISA-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/jump-address.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s
22

3-
; CHECK: JUMP @6
3+
; CHECK: JUMP @3
44
; CHECK: EXPORT
55
; CHECK-NOT: EXPORT
66

llvm/test/CodeGen/AMDGPU/predicates.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,8 @@ ENDIF:
4545
}
4646

4747
; CHECK-LABEL: {{^}}nested_if:
48-
; CHECK: ALU_PUSH_BEFORE
49-
; CHECK: JUMP
50-
; CHECK: POP
51-
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
48+
; CHECK: ALU
49+
; CHECK: CNDGT_INT
5250
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
5351
; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
5452
define amdgpu_kernel void @nested_if(ptr addrspace(1) %out, i32 %in) {

0 commit comments

Comments
 (0)