@@ -65,52 +65,52 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
65
65
;
66
66
; GFX9V4-LABEL: addrspacecast:
67
67
; GFX9V4: ; %bb.0:
68
- ; GFX9V4-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
68
+ ; GFX9V4-NEXT: s_load_dwordx2 s[4:5 ], s[8:9], 0x0
69
69
; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s12, s17
70
70
; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
71
- ; GFX9V4-NEXT: s_mov_b64 s[2:3 ], src_private_base
72
- ; GFX9V4-NEXT: s_mov_b64 s[4:5 ], src_shared_base
71
+ ; GFX9V4-NEXT: s_mov_b64 s[0:1 ], src_private_base
72
+ ; GFX9V4-NEXT: s_mov_b64 s[2:3 ], src_shared_base
73
73
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
74
- ; GFX9V4-NEXT: s_mov_b32 s2, s0
75
- ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
74
+ ; GFX9V4-NEXT: s_mov_b32 s0, s4
75
+ ; GFX9V4-NEXT: s_cmp_lg_u32 s4, -1
76
+ ; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
77
+ ; GFX9V4-NEXT: s_mov_b32 s2, s5
78
+ ; GFX9V4-NEXT: s_cmp_lg_u32 s5, -1
79
+ ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
76
80
; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
77
- ; GFX9V4-NEXT: s_mov_b32 s4, s1
78
- ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
79
- ; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
80
- ; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
81
81
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
82
- ; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
82
+ ; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
83
83
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
84
84
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
85
- ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
85
+ ; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
86
86
; GFX9V4-NEXT: v_mov_b32_e32 v2, 2
87
- ; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
87
+ ; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
88
88
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
89
89
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
90
90
; GFX9V4-NEXT: s_endpgm
91
91
;
92
92
; GFX9V5-LABEL: addrspacecast:
93
93
; GFX9V5: ; %bb.0:
94
- ; GFX9V5-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
94
+ ; GFX9V5-NEXT: s_load_dwordx2 s[4:5 ], s[8:9], 0x0
95
95
; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s12, s17
96
96
; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
97
- ; GFX9V5-NEXT: s_mov_b64 s[2:3 ], src_private_base
98
- ; GFX9V5-NEXT: s_mov_b64 s[4:5 ], src_shared_base
97
+ ; GFX9V5-NEXT: s_mov_b64 s[0:1 ], src_private_base
98
+ ; GFX9V5-NEXT: s_mov_b64 s[2:3 ], src_shared_base
99
99
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
100
- ; GFX9V5-NEXT: s_mov_b32 s2, s0
101
- ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
100
+ ; GFX9V5-NEXT: s_mov_b32 s0, s4
101
+ ; GFX9V5-NEXT: s_cmp_lg_u32 s4, -1
102
+ ; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
103
+ ; GFX9V5-NEXT: s_mov_b32 s2, s5
104
+ ; GFX9V5-NEXT: s_cmp_lg_u32 s5, -1
105
+ ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
102
106
; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
103
- ; GFX9V5-NEXT: s_mov_b32 s4, s1
104
- ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
105
- ; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
106
- ; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
107
107
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
108
- ; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
108
+ ; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
109
109
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
110
110
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
111
- ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
111
+ ; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
112
112
; GFX9V5-NEXT: v_mov_b32_e32 v2, 2
113
- ; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
113
+ ; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
114
114
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
115
115
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
116
116
; GFX9V5-NEXT: s_endpgm
@@ -150,10 +150,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
150
150
;
151
151
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
152
152
; GFX9V4: ; %bb.0:
153
- ; GFX9V4-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
154
- ; GFX9V4-NEXT: s_mov_b64 s[2:3 ], src_shared_base
153
+ ; GFX9V4-NEXT: s_load_dwordx2 s[2:3 ], s[8:9], 0x0
154
+ ; GFX9V4-NEXT: s_mov_b64 s[0:1 ], src_shared_base
155
155
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
156
- ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
156
+ ; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
157
157
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
158
158
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
159
159
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -162,10 +162,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
162
162
;
163
163
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
164
164
; GFX9V5: ; %bb.0:
165
- ; GFX9V5-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
166
- ; GFX9V5-NEXT: s_mov_b64 s[2:3 ], src_shared_base
165
+ ; GFX9V5-NEXT: s_load_dwordx2 s[2:3 ], s[8:9], 0x0
166
+ ; GFX9V5-NEXT: s_mov_b64 s[0:1 ], src_shared_base
167
167
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
168
- ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
168
+ ; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
169
169
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
170
170
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
171
171
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
@@ -206,10 +206,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
206
206
;
207
207
; GFX9V4-LABEL: llvm_amdgcn_is_private:
208
208
; GFX9V4: ; %bb.0:
209
- ; GFX9V4-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
210
- ; GFX9V4-NEXT: s_mov_b64 s[2:3 ], src_private_base
209
+ ; GFX9V4-NEXT: s_load_dwordx2 s[2:3 ], s[8:9], 0x0
210
+ ; GFX9V4-NEXT: s_mov_b64 s[0:1 ], src_private_base
211
211
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
212
- ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
212
+ ; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
213
213
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
214
214
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
215
215
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
@@ -218,10 +218,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
218
218
;
219
219
; GFX9V5-LABEL: llvm_amdgcn_is_private:
220
220
; GFX9V5: ; %bb.0:
221
- ; GFX9V5-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
222
- ; GFX9V5-NEXT: s_mov_b64 s[2:3 ], src_private_base
221
+ ; GFX9V5-NEXT: s_load_dwordx2 s[2:3 ], s[8:9], 0x0
222
+ ; GFX9V5-NEXT: s_mov_b64 s[0:1 ], src_private_base
223
223
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
224
- ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
224
+ ; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
225
225
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
226
226
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
227
227
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
0 commit comments