@@ -23,6 +23,7 @@ dims_2d_t test_dims_2d_t (unsigned int size1, int inc1, int inc2) {
2323 return dims_2d_t (size1,inc1, inc2);
2424}
2525
26+ //
2627// CHECK-COMMON-LABEL: @_Z15test2_dims_2d_tjiii(
2728// CHECK-COMMON-NEXT: entry:
2829// CHECK-COMMON-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DIMS_2D_T:%.*]] poison, i32 [[SIZE1:%.*]], 0
@@ -224,16 +225,18 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
224225 return add_2d_byte (a,off,size1,count1,inc1);
225226}
226227
227- // AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t (
228+ // AIE2-LABEL: @_Z15test_add_2d_ptrPDv16_iR9dims_2d_t (
228229// AIE2-NEXT: entry:
229230// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
230231// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
231232// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
232233// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
233234// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
234235// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
235- // AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
236- // AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
236+ // AIE2-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
237+ // AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[MUL_I_I]] to i20
238+ // AIE2-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
239+ // AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[MUL1_I_I]] to i20
237240// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
238241// AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
239242// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
@@ -244,14 +247,60 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
244247// AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
245248// AIE2-NEXT: ret ptr [[TMP11]]
246249//
247- // AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t (
250+ // AIE2P-LABEL: @_Z15test_add_2d_ptrPDv16_iR9dims_2d_t (
248251// AIE2P-NEXT: entry:
249252// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
250253// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
251254// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
252255// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
253256// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
254257// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
258+ // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
259+ // AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[MUL_I_I]] to i20
260+ // AIE2P-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
261+ // AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[MUL1_I_I]] to i20
262+ // AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
263+ // AIE2P-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
264+ // AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
265+ // AIE2P-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2p.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
266+ // AIE2P-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
267+ // AIE2P-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
268+ // AIE2P-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
269+ // AIE2P-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
270+ // AIE2P-NEXT: ret ptr [[TMP11]]
271+ //
272+ v16int32* test_add_2d_ptr (v16int32* a, dims_2d_t ¶ms){
273+ return add_2d_ptr (a,params);
274+ }
275+
276+ // AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
277+ // AIE2-NEXT: entry:
278+ // AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
279+ // AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14]]
280+ // AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16]]
281+ // AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
282+ // AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
283+ // AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17]]
284+ // AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
285+ // AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
286+ // AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
287+ // AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
288+ // AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
289+ // AIE2-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
290+ // AIE2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
291+ // AIE2-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
292+ // AIE2-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
293+ // AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
294+ // AIE2-NEXT: ret ptr [[TMP11]]
295+ //
296+ // AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
297+ // AIE2P-NEXT: entry:
298+ // AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
299+ // AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14]]
300+ // AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16]]
301+ // AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
302+ // AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
303+ // AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17]]
255304// AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
256305// AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
257306// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
@@ -268,6 +317,80 @@ v16int32* test_add_2d_byte(v16int32* a, dims_2d_t ¶ms){
268317 return add_2d_byte (a,params);
269318}
270319
320+ // AIE2-LABEL: @_Z15test_add_3d_ptrPDv16_iR9dims_3d_t(
321+ // AIE2-NEXT: entry:
322+ // AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
323+ // AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
324+ // AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
325+ // AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
326+ // AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
327+ // AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
328+ // AIE2-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
329+ // AIE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
330+ // AIE2-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
331+ // AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
332+ // AIE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
333+ // AIE2-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
334+ // AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[MUL_I_I]] to i20
335+ // AIE2-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
336+ // AIE2-NEXT: [[TMP6:%.*]] = trunc i32 [[MUL1_I_I]] to i20
337+ // AIE2-NEXT: [[MUL2_I_I:%.*]] = shl i32 [[TMP4]], 6
338+ // AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[MUL2_I_I]] to i20
339+ // AIE2-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
340+ // AIE2-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
341+ // AIE2-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
342+ // AIE2-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
343+ // AIE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
344+ // AIE2-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
345+ // AIE2-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
346+ // AIE2-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
347+ // AIE2-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
348+ // AIE2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
349+ // AIE2-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
350+ // AIE2-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
351+ // AIE2-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
352+ // AIE2-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
353+ // AIE2-NEXT: ret ptr [[TMP19]]
354+ //
355+ // AIE2P-LABEL: @_Z15test_add_3d_ptrPDv16_iR9dims_3d_t(
356+ // AIE2P-NEXT: entry:
357+ // AIE2P-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
358+ // AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
359+ // AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
360+ // AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
361+ // AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
362+ // AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
363+ // AIE2P-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
364+ // AIE2P-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
365+ // AIE2P-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
366+ // AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
367+ // AIE2P-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
368+ // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
369+ // AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[MUL_I_I]] to i20
370+ // AIE2P-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
371+ // AIE2P-NEXT: [[TMP6:%.*]] = trunc i32 [[MUL1_I_I]] to i20
372+ // AIE2P-NEXT: [[MUL2_I_I:%.*]] = shl i32 [[TMP4]], 6
373+ // AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[MUL2_I_I]] to i20
374+ // AIE2P-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
375+ // AIE2P-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
376+ // AIE2P-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
377+ // AIE2P-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
378+ // AIE2P-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
379+ // AIE2P-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
380+ // AIE2P-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
381+ // AIE2P-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
382+ // AIE2P-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
383+ // AIE2P-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
384+ // AIE2P-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
385+ // AIE2P-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
386+ // AIE2P-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
387+ // AIE2P-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
388+ // AIE2P-NEXT: ret ptr [[TMP19]]
389+ //
390+ v16int32* test_add_3d_ptr (v16int32* a, dims_3d_t ¶ms){
391+ return add_3d_ptr (a,params);
392+ }
393+
271394// AIE2-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
272395// AIE2-NEXT: entry:
273396// AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
0 commit comments