@@ -65,6 +65,160 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
6565 ret ptr addrspace (3 ) %masked
6666}
6767
68+ define ptr addrspace (7 ) @v_ptrmask_buffer_fat_ptr_variable_i32 (ptr addrspace (7 ) %ptr , i32 %mask ) {
69+ ; GCN-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
70+ ; GCN: ; %bb.0:
71+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v5
73+ ; GCN-NEXT: s_setpc_b64 s[30:31]
74+ ;
75+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
76+ ; GFX10PLUS: ; %bb.0:
77+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v5
79+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
80+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 %mask )
81+ ret ptr addrspace (7 ) %masked
82+ }
83+
84+ define ptr addrspace (7 ) @v_ptrmask_buffer_fat_ptr_i32_neg8 (ptr addrspace (7 ) %ptr ) {
85+ ; GCN-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
86+ ; GCN: ; %bb.0:
87+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+ ; GCN-NEXT: v_and_b32_e32 v4, -8, v4
89+ ; GCN-NEXT: s_setpc_b64 s[30:31]
90+ ;
91+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
92+ ; GFX10PLUS: ; %bb.0:
93+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, -8, v4
95+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
96+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 -8 )
97+ ret ptr addrspace (7 ) %masked
98+ }
99+
100+ define amdgpu_ps ptr addrspace (7 ) @s_ptrmask_buffer_fat_ptr_variable_i32 (ptr addrspace (7 ) inreg %ptr , i32 inreg %mask ) {
101+ ; GCN-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
102+ ; GCN: ; %bb.0:
103+ ; GCN-NEXT: s_mov_b32 s8, s4
104+ ; GCN-NEXT: s_mov_b32 s1, s3
105+ ; GCN-NEXT: s_mov_b32 s0, s2
106+ ; GCN-NEXT: s_and_b32 s4, s6, s7
107+ ; GCN-NEXT: s_mov_b32 s2, s8
108+ ; GCN-NEXT: s_mov_b32 s3, s5
109+ ; GCN-NEXT: ; return to shader part epilog
110+ ;
111+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
112+ ; GFX10PLUS: ; %bb.0:
113+ ; GFX10PLUS-NEXT: s_mov_b32 s8, s4
114+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
115+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
116+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, s7
117+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s8
118+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
119+ ; GFX10PLUS-NEXT: ; return to shader part epilog
120+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 %mask )
121+ ret ptr addrspace (7 ) %masked
122+ }
123+
124+ define amdgpu_ps ptr addrspace (7 ) @s_ptrmask_buffer_fat_ptr_i32_neg8 (ptr addrspace (7 ) inreg %ptr ) {
125+ ; GCN-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
126+ ; GCN: ; %bb.0:
127+ ; GCN-NEXT: s_mov_b32 s7, s4
128+ ; GCN-NEXT: s_mov_b32 s1, s3
129+ ; GCN-NEXT: s_mov_b32 s0, s2
130+ ; GCN-NEXT: s_and_b32 s4, s6, -8
131+ ; GCN-NEXT: s_mov_b32 s2, s7
132+ ; GCN-NEXT: s_mov_b32 s3, s5
133+ ; GCN-NEXT: ; return to shader part epilog
134+ ;
135+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
136+ ; GFX10PLUS: ; %bb.0:
137+ ; GFX10PLUS-NEXT: s_mov_b32 s7, s4
138+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
139+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
140+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, -8
141+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s7
142+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
143+ ; GFX10PLUS-NEXT: ; return to shader part epilog
144+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 -8 )
145+ ret ptr addrspace (7 ) %masked
146+ }
147+
148+ define ptr addrspace (8 ) @v_ptrmask_buffer_resource_variable_i128 (ptr addrspace (8 ) %ptr , i128 %mask ) {
149+ ; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128:
150+ ; GCN: ; %bb.0:
151+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v5
153+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v4
154+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v7
155+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v6
156+ ; GCN-NEXT: s_setpc_b64 s[30:31]
157+ ;
158+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128:
159+ ; GFX10PLUS: ; %bb.0:
160+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
162+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
163+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
164+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
165+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
166+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 %mask )
167+ ret ptr addrspace (8 ) %masked
168+ }
169+
170+ define ptr addrspace (8 ) @v_ptrmask_buffer_resource_variable_i128_neg8 (ptr addrspace (8 ) %ptr ) {
171+ ; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
172+ ; GCN: ; %bb.0:
173+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174+ ; GCN-NEXT: v_and_b32_e32 v0, -8, v0
175+ ; GCN-NEXT: s_setpc_b64 s[30:31]
176+ ;
177+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
178+ ; GFX10PLUS: ; %bb.0:
179+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0
181+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 -8 )
183+ ret ptr addrspace (8 ) %masked
184+ }
185+
186+ define amdgpu_ps ptr addrspace (8 ) @s_ptrmask_buffer_resource_variable_i128 (ptr addrspace (8 ) inreg %ptr , i128 inreg %mask ) {
187+ ; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128:
188+ ; GCN: ; %bb.0:
189+ ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
190+ ; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
191+ ; GCN-NEXT: ; return to shader part epilog
192+ ;
193+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128:
194+ ; GFX10PLUS: ; %bb.0:
195+ ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
196+ ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
197+ ; GFX10PLUS-NEXT: ; return to shader part epilog
198+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 %mask )
199+ ret ptr addrspace (8 ) %masked
200+ }
201+
202+ define amdgpu_ps ptr addrspace (8 ) @s_ptrmask_buffer_resource_variable_i128_neg8 (ptr addrspace (8 ) inreg %ptr ) {
203+ ; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
204+ ; GCN: ; %bb.0:
205+ ; GCN-NEXT: s_mov_b32 s1, s3
206+ ; GCN-NEXT: s_and_b32 s0, s2, -8
207+ ; GCN-NEXT: s_mov_b32 s2, s4
208+ ; GCN-NEXT: s_mov_b32 s3, s5
209+ ; GCN-NEXT: ; return to shader part epilog
210+ ;
211+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
212+ ; GFX10PLUS: ; %bb.0:
213+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
214+ ; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8
215+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
216+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
217+ ; GFX10PLUS-NEXT: ; return to shader part epilog
218+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 -8 )
219+ ret ptr addrspace (8 ) %masked
220+ }
221+
68222declare ptr addrspace (3 ) @llvm.ptrmask.p3.i32 (ptr addrspace (3 ), i32 ) #0
69223declare ptr addrspace (1 ) @llvm.ptrmask.p1.i64 (ptr addrspace (1 ), i64 ) #0
70224
0 commit comments