Skip to content

Commit 99da17d

Browse files
Update tests
1 parent 53965f2 commit 99da17d

File tree

116 files changed

+13743
-7860
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+13743
-7860
lines changed

llvm/test/CodeGen/AMDGPU/add_i1.ll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
23
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
34
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
@@ -6,6 +7,20 @@
67
; GFX9: v_xor_b32_e32
78
; GFX10: v_xor_b32_e32
89
define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
10+
; GFX9-LABEL: add_var_var_i1:
11+
; GFX9: ; %bb.0:
12+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
13+
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
14+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
15+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
16+
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] glc
17+
; GFX9-NEXT: s_waitcnt vmcnt(0)
18+
; GFX9-NEXT: global_load_ubyte v2, v0, s[6:7] glc
19+
; GFX9-NEXT: s_waitcnt vmcnt(0)
20+
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2
21+
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
22+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
23+
; GFX9-NEXT: s_endpgm
924
%a = load volatile i1, ptr addrspace(1) %in0
1025
%b = load volatile i1, ptr addrspace(1) %in1
1126
%add = add i1 %a, %b
@@ -17,6 +32,17 @@ define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1
1732
; GFX9: s_xor_b64
1833
; GFX10: s_xor_b32
1934
define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
35+
; GFX9-LABEL: add_var_imm_i1:
36+
; GFX9: ; %bb.0:
37+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
38+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
39+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
40+
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] glc
41+
; GFX9-NEXT: s_waitcnt vmcnt(0)
42+
; GFX9-NEXT: v_not_b32_e32 v1, v1
43+
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
44+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
45+
; GFX9-NEXT: s_endpgm
2046
%a = load volatile i1, ptr addrspace(1) %in
2147
%add = add i1 %a, 1
2248
store i1 %add, ptr addrspace(1) %out
@@ -28,6 +54,44 @@ define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1
2854
; GFX9: s_xor_b64
2955
; GFX10: s_xor_b32
3056
define amdgpu_kernel void @add_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
57+
; GFX9-LABEL: add_i1_cf:
58+
; GFX9: ; %bb.0: ; %entry
59+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
60+
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
61+
; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
62+
; GFX9-NEXT: ; implicit-def: $sgpr4_sgpr5
63+
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], vcc
64+
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
65+
; GFX9-NEXT: s_cbranch_execz .LBB2_2
66+
; GFX9-NEXT: ; %bb.1: ; %else
67+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
68+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
69+
; GFX9-NEXT: global_load_ubyte v0, v0, s[8:9] glc
70+
; GFX9-NEXT: s_waitcnt vmcnt(0)
71+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
72+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
73+
; GFX9-NEXT: .LBB2_2: ; %Flow
74+
; GFX9-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
75+
; GFX9-NEXT: s_cbranch_execz .LBB2_4
76+
; GFX9-NEXT: ; %bb.3: ; %if
77+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
78+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
79+
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] glc
80+
; GFX9-NEXT: s_waitcnt vmcnt(0)
81+
; GFX9-NEXT: s_andn2_b64 s[2:3], s[4:5], exec
82+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
83+
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
84+
; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec
85+
; GFX9-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
86+
; GFX9-NEXT: .LBB2_4: ; %endif
87+
; GFX9-NEXT: s_or_b64 exec, exec, s[6:7]
88+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
89+
; GFX9-NEXT: v_not_b32_e32 v1, v1
90+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
91+
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
92+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
93+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
94+
; GFX9-NEXT: s_endpgm
3195
entry:
3296
%tid = call i32 @llvm.amdgcn.workitem.id.x()
3397
%d_cmp = icmp ult i32 %tid, 16
@@ -49,3 +113,6 @@ endif:
49113
}
50114

51115
declare i32 @llvm.amdgcn.workitem.id.x()
116+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
117+
; GCN: {{.*}}
118+
; GFX10: {{.*}}

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -565,8 +565,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
565565
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
566566
; GFX908-NEXT: .LBB3_2: ; %bb9
567567
; GFX908-NEXT: ; =>This Loop Header: Depth=1
568-
; GFX908-NEXT: ; Child Loop BB3_5 Depth 2
569-
; GFX908-NEXT: s_mov_b64 s[18:19], -1
568+
; GFX908-NEXT: ; Child Loop BB3_6 Depth 2
569+
; GFX908-NEXT: s_mov_b64 s[22:23], -1
570570
; GFX908-NEXT: s_mov_b64 vcc, s[0:1]
571571
; GFX908-NEXT: s_cbranch_vccz .LBB3_10
572572
; GFX908-NEXT: ; %bb.3: ; %bb14
@@ -597,18 +597,25 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
597597
; GFX908-NEXT: s_add_i32 s13, s22, s13
598598
; GFX908-NEXT: s_mul_i32 s9, s6, s9
599599
; GFX908-NEXT: s_add_i32 s13, s13, s23
600-
; GFX908-NEXT: s_branch .LBB3_5
600+
; GFX908-NEXT: s_branch .LBB3_6
601601
; GFX908-NEXT: .LBB3_4: ; %bb58
602-
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
602+
; GFX908-NEXT: ; in Loop: Header=BB3_6 Depth=2
603603
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
604604
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
605605
; GFX908-NEXT: s_add_u32 s20, s20, s4
606606
; GFX908-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[2:3]
607607
; GFX908-NEXT: s_addc_u32 s21, s21, s5
608608
; GFX908-NEXT: s_mov_b64 s[22:23], 0
609+
; GFX908-NEXT: .LBB3_5: ; %Flow18
610+
; GFX908-NEXT: ; in Loop: Header=BB3_6 Depth=2
611+
; GFX908-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[22:23]
612+
; GFX908-NEXT: v_readfirstlane_b32 s22, v12
613+
; GFX908-NEXT: s_not_b32 s22, s22
614+
; GFX908-NEXT: s_bitcmp1_b32 s22, 0
615+
; GFX908-NEXT: s_cselect_b64 s[22:23], -1, 0
609616
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[24:25]
610-
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
611-
; GFX908-NEXT: .LBB3_5: ; %bb16
617+
; GFX908-NEXT: s_cbranch_vccz .LBB3_10
618+
; GFX908-NEXT: .LBB3_6: ; %bb16
612619
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
613620
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
614621
; GFX908-NEXT: s_add_u32 s22, s20, s9
@@ -625,9 +632,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
625632
; GFX908-NEXT: ds_read_b64 v[14:15], v0
626633
; GFX908-NEXT: s_and_b64 vcc, exec, s[2:3]
627634
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
628-
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
629-
; GFX908-NEXT: ; %bb.6: ; %bb51
630-
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
635+
; GFX908-NEXT: s_cbranch_vccnz .LBB3_8
636+
; GFX908-NEXT: ; %bb.7: ; %bb51
637+
; GFX908-NEXT: ; in Loop: Header=BB3_6 Depth=2
631638
; GFX908-NEXT: v_cvt_f32_f16_sdwa v22, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
632639
; GFX908-NEXT: v_cvt_f32_f16_e32 v21, v21
633640
; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
@@ -649,21 +656,20 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
649656
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
650657
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
651658
; GFX908-NEXT: s_branch .LBB3_4
652-
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
659+
; GFX908-NEXT: .LBB3_8: ; in Loop: Header=BB3_6 Depth=2
653660
; GFX908-NEXT: s_mov_b64 s[22:23], s[18:19]
654661
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
655662
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
656-
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
663+
; GFX908-NEXT: ; %bb.9: ; in Loop: Header=BB3_6 Depth=2
657664
; GFX908-NEXT: s_mov_b64 s[22:23], -1
658665
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
659666
; GFX908-NEXT: ; implicit-def: $sgpr20_sgpr21
660-
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
661-
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
662-
; GFX908-NEXT: s_xor_b64 s[18:19], s[22:23], -1
667+
; GFX908-NEXT: s_mov_b64 s[24:25], -1
668+
; GFX908-NEXT: s_branch .LBB3_5
663669
; GFX908-NEXT: .LBB3_10: ; %Flow19
664670
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
665671
; GFX908-NEXT: s_mov_b64 s[2:3], -1
666-
; GFX908-NEXT: s_and_b64 vcc, exec, s[18:19]
672+
; GFX908-NEXT: s_and_b64 vcc, exec, s[22:23]
667673
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
668674
; GFX908-NEXT: ; %bb.11: ; %bb12
669675
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
@@ -730,8 +736,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
730736
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
731737
; GFX90A-NEXT: .LBB3_2: ; %bb9
732738
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
733-
; GFX90A-NEXT: ; Child Loop BB3_5 Depth 2
734-
; GFX90A-NEXT: s_mov_b64 s[18:19], -1
739+
; GFX90A-NEXT: ; Child Loop BB3_6 Depth 2
740+
; GFX90A-NEXT: s_mov_b64 s[22:23], -1
735741
; GFX90A-NEXT: s_mov_b64 vcc, s[0:1]
736742
; GFX90A-NEXT: s_cbranch_vccz .LBB3_10
737743
; GFX90A-NEXT: ; %bb.3: ; %bb14
@@ -758,18 +764,25 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
758764
; GFX90A-NEXT: s_add_i32 s13, s22, s13
759765
; GFX90A-NEXT: s_mul_i32 s9, s6, s9
760766
; GFX90A-NEXT: s_add_i32 s13, s13, s23
761-
; GFX90A-NEXT: s_branch .LBB3_5
767+
; GFX90A-NEXT: s_branch .LBB3_6
762768
; GFX90A-NEXT: .LBB3_4: ; %bb58
763-
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
769+
; GFX90A-NEXT: ; in Loop: Header=BB3_6 Depth=2
764770
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
765771
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
766772
; GFX90A-NEXT: s_add_u32 s20, s20, s4
767773
; GFX90A-NEXT: s_addc_u32 s21, s21, s5
768774
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[4:5]
769775
; GFX90A-NEXT: s_mov_b64 s[22:23], 0
776+
; GFX90A-NEXT: .LBB3_5: ; %Flow18
777+
; GFX90A-NEXT: ; in Loop: Header=BB3_6 Depth=2
778+
; GFX90A-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[22:23]
779+
; GFX90A-NEXT: v_readfirstlane_b32 s22, v14
780+
; GFX90A-NEXT: s_not_b32 s22, s22
781+
; GFX90A-NEXT: s_bitcmp1_b32 s22, 0
782+
; GFX90A-NEXT: s_cselect_b64 s[22:23], -1, 0
770783
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[24:25]
771-
; GFX90A-NEXT: s_cbranch_vccz .LBB3_9
772-
; GFX90A-NEXT: .LBB3_5: ; %bb16
784+
; GFX90A-NEXT: s_cbranch_vccz .LBB3_10
785+
; GFX90A-NEXT: .LBB3_6: ; %bb16
773786
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1
774787
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
775788
; GFX90A-NEXT: s_add_u32 s22, s20, s9
@@ -787,9 +800,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
787800
; GFX90A-NEXT: s_and_b64 vcc, exec, s[2:3]
788801
; GFX90A-NEXT: ; kill: killed $sgpr22 killed $sgpr23
789802
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
790-
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
791-
; GFX90A-NEXT: ; %bb.6: ; %bb51
792-
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
803+
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_8
804+
; GFX90A-NEXT: ; %bb.7: ; %bb51
805+
; GFX90A-NEXT: ; in Loop: Header=BB3_6 Depth=2
793806
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
794807
; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21
795808
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
@@ -803,21 +816,20 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
803816
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
804817
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
805818
; GFX90A-NEXT: s_branch .LBB3_4
806-
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
819+
; GFX90A-NEXT: .LBB3_8: ; in Loop: Header=BB3_6 Depth=2
807820
; GFX90A-NEXT: s_mov_b64 s[22:23], s[18:19]
808821
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
809822
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
810-
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
823+
; GFX90A-NEXT: ; %bb.9: ; in Loop: Header=BB3_6 Depth=2
811824
; GFX90A-NEXT: s_mov_b64 s[22:23], -1
812825
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
813826
; GFX90A-NEXT: ; implicit-def: $sgpr20_sgpr21
814-
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
815-
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
816-
; GFX90A-NEXT: s_xor_b64 s[18:19], s[22:23], -1
827+
; GFX90A-NEXT: s_mov_b64 s[24:25], -1
828+
; GFX90A-NEXT: s_branch .LBB3_5
817829
; GFX90A-NEXT: .LBB3_10: ; %Flow19
818830
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
819831
; GFX90A-NEXT: s_mov_b64 s[2:3], -1
820-
; GFX90A-NEXT: s_and_b64 vcc, exec, s[18:19]
832+
; GFX90A-NEXT: s_and_b64 vcc, exec, s[22:23]
821833
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
822834
; GFX90A-NEXT: ; %bb.11: ; %bb12
823835
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1

0 commit comments

Comments
 (0)