77define amdgpu_kernel void @select_ptr_crash_i64_flat (i32 %tmp , [8 x i32 ], ptr %ptr0 , [8 x i32 ], ptr %ptr1 , [8 x i32 ], ptr addrspace (1 ) %ptr2 ) {
88; GCN-LABEL: select_ptr_crash_i64_flat:
99; GCN: ; %bb.0:
10- ; GCN-NEXT: s_load_dword s6, s[8:9], 0x0
11- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x28
12- ; GCN-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x50
13- ; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x78
1410; GCN-NEXT: s_add_i32 s12, s12, s17
1511; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
12+ ; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
13+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x78
14+ ; GCN-NEXT: s_add_u32 s4, s8, 40
15+ ; GCN-NEXT: s_addc_u32 s3, s9, 0
16+ ; GCN-NEXT: s_add_u32 s5, s8, 0x50
17+ ; GCN-NEXT: s_addc_u32 s6, s9, 0
1618; GCN-NEXT: s_waitcnt lgkmcnt(0)
17- ; GCN-NEXT: s_cmp_eq_u32 s6, 0
18- ; GCN-NEXT: s_cselect_b32 s0, s0, s2
19- ; GCN-NEXT: s_cselect_b32 s1, s1, s3
20- ; GCN-NEXT: v_mov_b32_e32 v0, s0
21- ; GCN-NEXT: v_mov_b32_e32 v1, s1
22- ; GCN-NEXT: s_add_u32 s0, s0, 4
19+ ; GCN-NEXT: s_cmp_eq_u32 s2, 0
20+ ; GCN-NEXT: s_cselect_b32 s3, s3, s6
21+ ; GCN-NEXT: s_cselect_b32 s2, s4, s5
22+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
2323; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
24- ; GCN-NEXT: s_addc_u32 s1, s1, 0
24+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
25+ ; GCN-NEXT: v_mov_b32_e32 v0, s2
26+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
27+ ; GCN-NEXT: s_add_u32 s2, s2, 4
2528; GCN-NEXT: flat_load_dword v0, v[0:1]
26- ; GCN-NEXT: v_mov_b32_e32 v2, s1
27- ; GCN-NEXT: v_mov_b32_e32 v1, s0
29+ ; GCN-NEXT: s_addc_u32 s3, s3, 0
30+ ; GCN-NEXT: v_mov_b32_e32 v1, s2
31+ ; GCN-NEXT: v_mov_b32_e32 v2, s3
2832; GCN-NEXT: flat_load_dword v1, v[1:2]
29- ; GCN-NEXT: v_mov_b32_e32 v2, s4
30- ; GCN-NEXT: v_mov_b32_e32 v3, s5
33+ ; GCN-NEXT: v_mov_b32_e32 v3, s1
34+ ; GCN-NEXT: v_mov_b32_e32 v2, s0
3135; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3236; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
3337; GCN-NEXT: s_endpgm
@@ -45,25 +49,28 @@ define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], ptr %p
4549define amdgpu_kernel void @select_ptr_crash_i64_global (i32 %tmp , [8 x i32 ], ptr addrspace (1 ) %ptr0 , [8 x i32 ], ptr addrspace (1 ) %ptr1 , [8 x i32 ], ptr addrspace (1 ) %ptr2 ) {
4650; GCN-LABEL: select_ptr_crash_i64_global:
4751; GCN: ; %bb.0:
48- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x28
49- ; GCN-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x50
50- ; GCN-NEXT: s_load_dword s6, s[8:9], 0x0
51- ; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x78
5252; GCN-NEXT: s_add_i32 s12, s12, s17
5353; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
54+ ; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
55+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x78
56+ ; GCN-NEXT: s_add_u32 s4, s8, 40
57+ ; GCN-NEXT: s_addc_u32 s3, s9, 0
58+ ; GCN-NEXT: s_add_u32 s5, s8, 0x50
59+ ; GCN-NEXT: s_addc_u32 s6, s9, 0
5460; GCN-NEXT: s_waitcnt lgkmcnt(0)
55- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
61+ ; GCN-NEXT: s_cmp_eq_u32 s2, 0
62+ ; GCN-NEXT: s_cselect_b32 s3, s3, s6
63+ ; GCN-NEXT: s_cselect_b32 s2, s4, s5
5664; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
57- ; GCN-NEXT: s_cmp_eq_u32 s6, 0
58- ; GCN-NEXT: v_mov_b32_e32 v2, s4
59- ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
60- ; GCN-NEXT: v_mov_b32_e32 v3, s5
61- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
62- ; GCN-NEXT: s_cselect_b32 s1, s1, s3
63- ; GCN-NEXT: s_cselect_b32 s0, s0, s2
6465; GCN-NEXT: v_mov_b32_e32 v0, s0
66+ ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
6567; GCN-NEXT: v_mov_b32_e32 v1, s1
66- ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
68+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
69+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
70+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
71+ ; GCN-NEXT: v_mov_b32_e32 v2, s2
72+ ; GCN-NEXT: v_mov_b32_e32 v3, s3
73+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
6774; GCN-NEXT: s_endpgm
6875 %tmp2 = icmp eq i32 %tmp , 0
6976 %tmp3 = load i64 , ptr addrspace (1 ) %ptr0 , align 8
@@ -78,22 +85,18 @@ define amdgpu_kernel void @select_ptr_crash_i64_local(i32 %tmp, ptr addrspace(3)
7885; GCN: ; %bb.0:
7986; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
8087; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
81- ; GCN-NEXT: s_mov_b32 m0, -1
8288; GCN-NEXT: s_add_i32 s12, s12, s17
8389; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
90+ ; GCN-NEXT: s_mov_b32 m0, -1
8491; GCN-NEXT: s_waitcnt lgkmcnt(0)
85- ; GCN-NEXT: v_mov_b32_e32 v0, s1
86- ; GCN-NEXT: v_mov_b32_e32 v2, s2
87- ; GCN-NEXT: ds_read_b64 v[0:1], v0
88- ; GCN-NEXT: ds_read_b64 v[2:3], v2
8992; GCN-NEXT: s_cmp_eq_u32 s0, 0
90- ; GCN-NEXT: s_cselect_b64 vcc, -1, 0
91- ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
92- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
93- ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
94- ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
93+ ; GCN-NEXT: s_cselect_b32 s0, s1, s2
94+ ; GCN-NEXT: v_mov_b32_e32 v0, s0
95+ ; GCN-NEXT: ds_read_b64 v[0:1], v0
9596; GCN-NEXT: v_mov_b32_e32 v2, s4
97+ ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
9698; GCN-NEXT: v_mov_b32_e32 v3, s5
99+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
97100; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
98101; GCN-NEXT: s_endpgm
99102 %tmp2 = icmp eq i32 %tmp , 0
@@ -112,22 +115,20 @@ define amdgpu_kernel void @select_ptr_crash_i64_local_offsets(i32 %tmp, ptr addr
112115; GCN: ; %bb.0:
113116; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
114117; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
115- ; GCN-NEXT: s_mov_b32 m0, -1
116118; GCN-NEXT: s_add_i32 s12, s12, s17
117119; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
120+ ; GCN-NEXT: s_mov_b32 m0, -1
118121; GCN-NEXT: s_waitcnt lgkmcnt(0)
119- ; GCN-NEXT: v_mov_b32_e32 v0, s1
120- ; GCN-NEXT: v_mov_b32_e32 v2, s2
121- ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128
122- ; GCN-NEXT: ds_read_b64 v[2:3], v2 offset:512
122+ ; GCN-NEXT: s_addk_i32 s1, 0x80
123+ ; GCN-NEXT: s_addk_i32 s2, 0x200
123124; GCN-NEXT: s_cmp_eq_u32 s0, 0
124- ; GCN-NEXT: s_cselect_b64 vcc, -1, 0
125- ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
126- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
127- ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
128- ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
125+ ; GCN-NEXT: s_cselect_b32 s0, s1, s2
126+ ; GCN-NEXT: v_mov_b32_e32 v0, s0
127+ ; GCN-NEXT: ds_read_b64 v[0:1], v0
129128; GCN-NEXT: v_mov_b32_e32 v2, s4
129+ ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
130130; GCN-NEXT: v_mov_b32_e32 v3, s5
131+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
131132; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
132133; GCN-NEXT: s_endpgm
133134 %tmp2 = icmp eq i32 %tmp , 0
0 commit comments