@@ -187,3 +187,109 @@ v8i test_gather(v8b mask, v8i idx, int *ptr) {
187187void test_scatter (v8b mask , v8i val , v8i idx , int * ptr ) {
188188 __builtin_masked_scatter (mask , val , idx , ptr );
189189}
190+
191+ // CHECK-LABEL: define dso_local <8 x i32> @test_load_as(
192+ // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR0]] {
193+ // CHECK-NEXT: [[ENTRY:.*:]]
194+ // CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1
195+ // CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1
196+ // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(42), align 8
197+ // CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1
198+ // CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1
199+ // CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
200+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[MASK1]] to i8
201+ // CHECK-NEXT: store i8 [[TMP0]], ptr [[MASK_ADDR]], align 1
202+ // CHECK-NEXT: store ptr addrspace(42) [[PTR]], ptr [[PTR_ADDR]], align 8
203+ // CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1
204+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
205+ // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(42), ptr [[PTR_ADDR]], align 8
206+ // CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p42(ptr addrspace(42) [[TMP2]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
207+ // CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
208+ //
209+ v8i test_load_as (v8b mask , int __attribute__((address_space (42 ))) * ptr ) {
210+ return __builtin_masked_load (mask , ptr );
211+ }
212+
213+ // CHECK-LABEL: define dso_local void @test_store_as(
214+ // CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr addrspace(42) noundef [[P:%.*]]) #[[ATTR3]] {
215+ // CHECK-NEXT: [[ENTRY:.*:]]
216+ // CHECK-NEXT: [[M:%.*]] = alloca i8, align 1
217+ // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1
218+ // CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32
219+ // CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr addrspace(42), align 8
220+ // CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1
221+ // CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
222+ // CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
223+ // CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
224+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8
225+ // CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1
226+ // CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32
227+ // CHECK-NEXT: store ptr addrspace(42) [[P]], ptr [[P_ADDR]], align 8
228+ // CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
229+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
230+ // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32
231+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(42), ptr [[P_ADDR]], align 8
232+ // CHECK-NEXT: call void @llvm.masked.store.v8i32.p42(<8 x i32> [[TMP3]], ptr addrspace(42) [[TMP4]], i32 4, <8 x i1> [[TMP2]])
233+ // CHECK-NEXT: ret void
234+ //
235+ void test_store_as (v8b m , v8i v , int __attribute__((address_space (42 ))) * p ) {
236+ __builtin_masked_store (m , v , p );
237+ }
238+
239+ // CHECK-LABEL: define dso_local <8 x i32> @test_gather_as(
240+ // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR0]] {
241+ // CHECK-NEXT: [[ENTRY:.*:]]
242+ // CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1
243+ // CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1
244+ // CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32
245+ // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(42), align 8
246+ // CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1
247+ // CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1
248+ // CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
249+ // CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
250+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[MASK1]] to i8
251+ // CHECK-NEXT: store i8 [[TMP1]], ptr [[MASK_ADDR]], align 1
252+ // CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32
253+ // CHECK-NEXT: store ptr addrspace(42) [[PTR]], ptr [[PTR_ADDR]], align 8
254+ // CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1
255+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
256+ // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32
257+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(42), ptr [[PTR_ADDR]], align 8
258+ // CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr addrspace(42) [[TMP4]], <8 x i32> [[TMP3]]
259+ // CHECK-NEXT: [[MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p42(<8 x ptr addrspace(42)> [[TMP5]], i32 4, <8 x i1> [[TMP2]], <8 x i32> poison)
260+ // CHECK-NEXT: ret <8 x i32> [[MASKED_GATHER]]
261+ //
262+ v8i test_gather_as (v8b mask , v8i idx , int __attribute__((address_space (42 ))) * ptr ) {
263+ return __builtin_masked_gather (mask , idx , ptr );
264+ }
265+
266+ // CHECK-LABEL: define dso_local void @test_scatter_as(
267+ // CHECK-SAME: i8 noundef [[MASK_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP1:%.*]], ptr addrspace(42) noundef [[PTR:%.*]]) #[[ATTR3]] {
268+ // CHECK-NEXT: [[ENTRY:.*:]]
269+ // CHECK-NEXT: [[MASK:%.*]] = alloca i8, align 1
270+ // CHECK-NEXT: [[MASK_ADDR:%.*]] = alloca i8, align 1
271+ // CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <8 x i32>, align 32
272+ // CHECK-NEXT: [[IDX_ADDR:%.*]] = alloca <8 x i32>, align 32
273+ // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(42), align 8
274+ // CHECK-NEXT: store i8 [[MASK_COERCE]], ptr [[MASK]], align 1
275+ // CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[MASK]], align 1
276+ // CHECK-NEXT: [[MASK1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
277+ // CHECK-NEXT: [[VAL:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
278+ // CHECK-NEXT: [[IDX:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
279+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i1> [[MASK1]] to i8
280+ // CHECK-NEXT: store i8 [[TMP2]], ptr [[MASK_ADDR]], align 1
281+ // CHECK-NEXT: store <8 x i32> [[VAL]], ptr [[VAL_ADDR]], align 32
282+ // CHECK-NEXT: store <8 x i32> [[IDX]], ptr [[IDX_ADDR]], align 32
283+ // CHECK-NEXT: store ptr addrspace(42) [[PTR]], ptr [[PTR_ADDR]], align 8
284+ // CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[MASK_ADDR]], align 1
285+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
286+ // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VAL_ADDR]], align 32
287+ // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[IDX_ADDR]], align 32
288+ // CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(42), ptr [[PTR_ADDR]], align 8
289+ // CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr addrspace(42) [[TMP6]], <8 x i32> [[TMP4]]
290+ // CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p42(<8 x i32> [[TMP5]], <8 x ptr addrspace(42)> [[TMP7]], i32 4, <8 x i1> [[TMP3]])
291+ // CHECK-NEXT: ret void
292+ //
293+ void test_scatter_as (v8b mask , v8i val , v8i idx , int __attribute__((address_space (42 ))) * ptr ) {
294+ __builtin_masked_scatter (mask , val , idx , ptr );
295+ }
0 commit comments