@@ -7,7 +7,6 @@ string opencl_c_container() { return R( // ########################## begin of O
77kernel void kernel_double (global float * data) {
88 double x = (double )get_global_id (0 );
99 double y = (double )get_local_id (0 );
10- #pragma unroll
1110 for (uint i=0u ; i<128u ; i++) {
1211 x = fma (y, x, y);
1312 y = fma (x, y, x);
@@ -19,7 +18,6 @@ kernel void kernel_double(global float* data) {
1918kernel void kernel_float (global float * data) {
2019 float x = (float )get_global_id (0 );
2120 float y = (float )get_local_id (0 );
22- #pragma unroll
2321 for (uint i=0u ; i<512u ; i++) {
2422 x = fma (y, x, y);
2523 y = fma (x, y, x);
@@ -31,7 +29,6 @@ kernel void kernel_float(global float* data) {
3129kernel void kernel_half (global float * data) {
3230 half2 x = (half2)((float )get_global_id (0 ), (float )get_local_id (0 ));
3331 half2 y = (half2)((float )get_local_id (0 ), (float )get_global_id (0 ));
34- #pragma unroll
3532 for (uint i=0u ; i<512u ; i++) {
3633 x = fma (y, x, y);
3734 y = fma (x, y, x);
@@ -43,7 +40,6 @@ kernel void kernel_half(global float* data) {
4340kernel void kernel_long (global float * data) {
4441 long x = (long )get_global_id (0 );
4542 long y = (long )get_local_id (0 );
46- #pragma unroll
4743 for (uint i=0u ; i<8u ; i++) {
4844 x = (y*x)+y;
4945 y = (x*y)+x;
@@ -54,7 +50,6 @@ kernel void kernel_long(global float* data) {
5450kernel void kernel_int (global float * data) {
5551 int x = get_global_id (0 );
5652 int y = get_local_id (0 );
57- #pragma unroll
5853 for (uint i=0u ; i<512u ; i++) {
5954 x = (y*x)+y;
6055 y = (x*y)+x;
@@ -65,7 +60,6 @@ kernel void kernel_int(global float* data) {
6560kernel void kernel_short (global float * data) {
6661 short2 x = as_short2 ((int )get_global_id (0 ));
6762 short2 y = as_short2 ((int )get_local_id (0 ));
68- #pragma unroll
6963 for (uint i=0u ; i<128u ; i++) {
7064 x = (y*x)+y;
7165 y = (x*y)+x;
@@ -76,7 +70,6 @@ kernel void kernel_short(global float* data) {
7670kernel void kernel_char (global float * data) {
7771 char4 x = as_char4 ((int )get_global_id (0 ));
7872 char4 y = as_char4 ((int )get_local_id (0 ));
79- #pragma unroll
8073 for (uint i=0u ; i<64u ; i++) {
8174 x = (y*x)+y;
8275 y = (x*y)+x;
@@ -88,25 +81,21 @@ kernel void kernel_char(global float* data) {
8881
8982kernel void kernel_coalesced_write (global float * data) {
9083 const uint n = get_global_id (0 );
91- #pragma unroll
9284 for (uint i=0u ; i<def_M; i++) data[i*def_N+n] = 0 .0f ; // coalesced write
9385}
9486kernel void kernel_coalesced_read (global float * data) {
9587 const uint n = get_global_id (0 );
9688 float x = 0 .0f ;
97- #pragma unroll
9889 for (uint i=0u ; i<def_M; i++) x += data[i*def_N+n]; // coalesced read
9990 data[n] = x;
10091}
10192kernel void kernel_misaligned_write (global float * data) {
10293 const uint n = get_global_id (0 );
103- #pragma unroll
10494 for (uint i=0u ; i<def_M; i++) data[n*def_M+i] = 0 .0f ; // misaligned write
10595}
10696kernel void kernel_misaligned_read (global float * data) {
10797 const uint n = get_global_id (0 );
10898 float x = 0 .0f ;
109- #pragma unroll
11099 for (uint i=0u ; i<def_M; i++) x += data[n*def_M+i]; // misaligned read
111100 data[n] = x;
112101}
0 commit comments