Skip to content

Commit adcfd3b

Browse files
[AIE2][AIE2P] Add missing add_2d_ptr/add_3d_ptr intrinsics
These take the dims_2d_t/dims_3d_t directly, and were previously omitted. This adds support for the intrinsics, along with clang lowering tests.
1 parent eed184b commit adcfd3b

File tree

3 files changed

+149
-4
lines changed

3 files changed

+149
-4
lines changed

clang/lib/Headers/aie2p_addr.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,22 @@ dims_3d_from_steps(unsigned int size1, int step1, unsigned int size2, int step2,
136136
count1, count2);
137137
};
138138

139+
template <typename T> INTRINSIC(T *) add_2d_ptr(T *a, dims_2d_t &params) {
140+
return add_2d_ptr(a, params.inc2, params.num1, (addr_t &)params.count1,
141+
params.inc1);
142+
}
143+
139144
template <typename T> INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t &params) {
140145
return add_2d_byte(a, params.inc2, params.num1, (addr_t &)params.count1,
141146
params.inc1);
142147
}
143148

149+
template <typename T> INTRINSIC(T *) add_3d_ptr(T *a, dims_3d_t &params) {
150+
return add_3d_ptr(a, params.inc3, params.num1, (addr_t &)params.count1,
151+
params.inc1, params.num2, (addr_t &)params.count2,
152+
params.inc2);
153+
}
154+
144155
template <typename T> INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t &params) {
145156
return add_3d_byte(a, params.inc3, params.num1, (addr_t &)params.count1,
146157
params.inc1, params.num2, (addr_t &)params.count2,

clang/lib/Headers/aiev2_addr.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,22 @@ load_lut_2x_float(const void *lut1, const void *lut2, v16uint32 offset,
264264
(v32bfloat16)insert(v2, 1, (v16bfloat16)read_lut64_3(lut1, lut2, offset));
265265
}
266266

267+
template <typename T> INTRINSIC(T *) add_2d_ptr(T *a, dims_2d_t &params) {
268+
return add_2d_ptr(a, params.inc2, params.num1, (addr_t &)params.count1,
269+
params.inc1);
270+
}
271+
267272
template <typename T> INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t &params) {
268273
return add_2d_byte(a, params.inc2, params.num1, (addr_t &)params.count1,
269274
params.inc1);
270275
}
271276

277+
template <typename T> INTRINSIC(T *) add_3d_ptr(T *a, dims_3d_t &params) {
278+
return add_3d_ptr(a, params.inc3, params.num1, (addr_t &)params.count1,
279+
params.inc1, params.num2, (addr_t &)params.count2,
280+
params.inc2);
281+
}
282+
272283
template <typename T> INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t &params) {
273284
return add_3d_byte(a, params.inc3, params.num1, (addr_t &)params.count1,
274285
params.inc1, params.num2, (addr_t &)params.count2,

clang/test/CodeGen/aie/aie-addr-intrinsic.cpp

Lines changed: 127 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dims_2d_t test_dims_2d_t (unsigned int size1, int inc1, int inc2) {
2323
return dims_2d_t(size1,inc1, inc2);
2424
}
2525

26+
//
2627
// CHECK-COMMON-LABEL: @_Z15test2_dims_2d_tjiii(
2728
// CHECK-COMMON-NEXT: entry:
2829
// CHECK-COMMON-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DIMS_2D_T:%.*]] poison, i32 [[SIZE1:%.*]], 0
@@ -224,16 +225,18 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
224225
return add_2d_byte(a,off,size1,count1,inc1);
225226
}
226227

227-
// AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
228+
// AIE2-LABEL: @_Z15test_add_2d_ptrPDv16_iR9dims_2d_t(
228229
// AIE2-NEXT: entry:
229230
// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
230231
// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
231232
// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
232233
// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
233234
// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
234235
// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
235-
// AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
236-
// AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
236+
// AIE2-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
237+
// AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[MUL_I_I]] to i20
238+
// AIE2-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
239+
// AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[MUL1_I_I]] to i20
237240
// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
238241
// AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
239242
// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
@@ -244,14 +247,60 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
244247
// AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
245248
// AIE2-NEXT: ret ptr [[TMP11]]
246249
//
247-
// AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
250+
// AIE2P-LABEL: @_Z15test_add_2d_ptrPDv16_iR9dims_2d_t(
248251
// AIE2P-NEXT: entry:
249252
// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
250253
// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
251254
// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
252255
// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
253256
// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
254257
// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
258+
// AIE2P-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
259+
// AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[MUL_I_I]] to i20
260+
// AIE2P-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
261+
// AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[MUL1_I_I]] to i20
262+
// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
263+
// AIE2P-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
264+
// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
265+
// AIE2P-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2p.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
266+
// AIE2P-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
267+
// AIE2P-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
268+
// AIE2P-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
269+
// AIE2P-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
270+
// AIE2P-NEXT: ret ptr [[TMP11]]
271+
//
272+
v16int32* test_add_2d_ptr(v16int32* a, dims_2d_t &params){
273+
return add_2d_ptr(a,params);
274+
}
275+
276+
// AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
277+
// AIE2-NEXT: entry:
278+
// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
279+
// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14]]
280+
// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16]]
281+
// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
282+
// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
283+
// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17]]
284+
// AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
285+
// AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
286+
// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
287+
// AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
288+
// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
289+
// AIE2-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
290+
// AIE2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
291+
// AIE2-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
292+
// AIE2-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
293+
// AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
294+
// AIE2-NEXT: ret ptr [[TMP11]]
295+
//
296+
// AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
297+
// AIE2P-NEXT: entry:
298+
// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
299+
// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14]]
300+
// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16]]
301+
// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
302+
// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
303+
// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17]]
255304
// AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
256305
// AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
257306
// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
@@ -268,6 +317,80 @@ v16int32* test_add_2d_byte(v16int32* a, dims_2d_t &params){
268317
return add_2d_byte(a,params);
269318
}
270319

320+
// AIE2-LABEL: @_Z15test_add_3d_ptrPDv16_iR9dims_3d_t(
321+
// AIE2-NEXT: entry:
322+
// AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
323+
// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
324+
// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
325+
// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
326+
// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
327+
// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
328+
// AIE2-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
329+
// AIE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
330+
// AIE2-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
331+
// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
332+
// AIE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
333+
// AIE2-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
334+
// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[MUL_I_I]] to i20
335+
// AIE2-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
336+
// AIE2-NEXT: [[TMP6:%.*]] = trunc i32 [[MUL1_I_I]] to i20
337+
// AIE2-NEXT: [[MUL2_I_I:%.*]] = shl i32 [[TMP4]], 6
338+
// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[MUL2_I_I]] to i20
339+
// AIE2-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
340+
// AIE2-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
341+
// AIE2-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
342+
// AIE2-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
343+
// AIE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
344+
// AIE2-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
345+
// AIE2-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
346+
// AIE2-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
347+
// AIE2-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
348+
// AIE2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
349+
// AIE2-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
350+
// AIE2-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
351+
// AIE2-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
352+
// AIE2-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
353+
// AIE2-NEXT: ret ptr [[TMP19]]
354+
//
355+
// AIE2P-LABEL: @_Z15test_add_3d_ptrPDv16_iR9dims_3d_t(
356+
// AIE2P-NEXT: entry:
357+
// AIE2P-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
358+
// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
359+
// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
360+
// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
361+
// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
362+
// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
363+
// AIE2P-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
364+
// AIE2P-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
365+
// AIE2P-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
366+
// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
367+
// AIE2P-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
368+
// AIE2P-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP0]], 6
369+
// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[MUL_I_I]] to i20
370+
// AIE2P-NEXT: [[MUL1_I_I:%.*]] = shl i32 [[TMP2]], 6
371+
// AIE2P-NEXT: [[TMP6:%.*]] = trunc i32 [[MUL1_I_I]] to i20
372+
// AIE2P-NEXT: [[MUL2_I_I:%.*]] = shl i32 [[TMP4]], 6
373+
// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[MUL2_I_I]] to i20
374+
// AIE2P-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
375+
// AIE2P-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
376+
// AIE2P-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
377+
// AIE2P-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
378+
// AIE2P-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
379+
// AIE2P-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
380+
// AIE2P-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
381+
// AIE2P-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
382+
// AIE2P-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
383+
// AIE2P-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
384+
// AIE2P-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
385+
// AIE2P-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
386+
// AIE2P-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
387+
// AIE2P-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
388+
// AIE2P-NEXT: ret ptr [[TMP19]]
389+
//
390+
v16int32* test_add_3d_ptr(v16int32* a, dims_3d_t &params){
391+
return add_3d_ptr(a,params);
392+
}
393+
271394
// AIE2-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
272395
// AIE2-NEXT: entry:
273396
// AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16

0 commit comments

Comments
 (0)