Skip to content

Commit bc5adb0

Browse files
committed
opencl: add missing add_id.cl
1 parent 02721cc commit bc5adb0

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
3+
//------------------------------------------------------------------------------
4+
// add_id
5+
//------------------------------------------------------------------------------
6+
kernel void kernel_add_id(
7+
global char * src0,
8+
ulong offset0,
9+
global char * src1,
10+
ulong offset1,
11+
global char * src2,
12+
ulong offset2,
13+
global char * dst,
14+
ulong offsetd,
15+
ulong nb01,
16+
ulong nb02,
17+
ulong nb11,
18+
ulong nb21,
19+
int ne0,
20+
int ne1
21+
) {
22+
src0 = (global char*)((global char*)src0 + offset0);
23+
src1 = (global char*)((global char*)src1 + offset1);
24+
src2 = (global char*)((global char*)src2 + offset2);
25+
dst = (global char*)((global char*)dst + offsetd);
26+
27+
int i1 = get_group_id(0);
28+
int i2 = get_group_id(1);
29+
30+
const int i11 = *((global const int *) (src2 + i1*sizeof(int) + i2*nb21));
31+
32+
const size_t nb1 = ne0 * sizeof(float);
33+
const size_t nb2 = ne1 * nb1;
34+
35+
global float * dst_row = (global float *)((global char *)dst + i1*nb1 + i2*nb2);
36+
global float * src0_row = (global float *)((global char *)src0 + i1*nb01 + i2*nb02);
37+
global float * src1_row = (global float *)((global char *)src1 + i11*nb11);
38+
39+
for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
40+
dst_row[i0] = src0_row[i0] + src1_row[i0];
41+
}
42+
}

0 commit comments

Comments
 (0)