File tree Expand file tree Collapse file tree 3 files changed +9
-9
lines changed Expand file tree Collapse file tree 3 files changed +9
-9
lines changed Original file line number Diff line number Diff line change @@ -62,7 +62,7 @@ __local double Bs[BLOCK_SIZE*BLOCK_SIZE]; \n
62
62
__local double workspace[BLOCK_SIZE]; \n // workspace used to store the current working column
63
63
64
64
// load A
65
- # pragma unroll\n
65
+ _Pragma ( " unroll" ) \n
66
66
for (i = 0 ; i < BLOCK_SIZE; i++)\n
67
67
{ \n
68
68
if (tx >= i && gx < na)\n
@@ -138,7 +138,7 @@ for (i = BLOCK_SIZE - 2; i >= 0; i--) {\n
138
138
139
139
txw = (tx - i - 1 ); \n
140
140
141
- # pragma unroll\n
141
+ _Pragma ( " unroll" ) \n
142
142
for (j = 0 ; j < BLOCK_SIZE - i - 1 ; j++)\n
143
143
Ystx += switcher*(*(Bw + j*BLOCK_SIZE + txw)*x[j]); \n
144
144
@@ -163,7 +163,7 @@ for (i = BLOCK_SIZE - 2; i >= 0; i--) {\n
163
163
}\n
164
164
165
165
// write back A
166
- # pragma unroll\n
166
+ _Pragma ( " unroll" ) \n
167
167
for (i = 0 ; i < BLOCK_SIZE; i++)\n
168
168
*(d_dinvA + i*NB + tx) = Bs[i*BLOCK_SIZE + tx]; \n
169
169
}\n
Original file line number Diff line number Diff line change @@ -59,7 +59,7 @@ uint na)\n
59
59
__local double workspace[BLOCK_SIZE]; \n // workspace used to store the current working column
60
60
61
61
// load A
62
- # pragma unroll \n
62
+ _Pragma ( " unroll" ) \n
63
63
for ( i=0 ; i < BLOCK_SIZE; i++ )\n
64
64
{\n
65
65
if (tx <= i && i+bx*BLOCK_SIZE < na )\n
@@ -111,7 +111,7 @@ uint na)\n
111
111
workspace[tx] = *(Bs+i*BLOCK_SIZE+tx);\n
112
112
y = Bs+i*BLOCK_SIZE;\n
113
113
114
- # pragma unroll\n
114
+ _Pragma ( " unroll" ) \n
115
115
// for( j=tx; j < i; j++ )
116
116
for ( j=0 ; j < i; j++ )\n
117
117
{\n
@@ -139,7 +139,7 @@ uint na)\n
139
139
}\n
140
140
141
141
// write back A
142
- # pragma unroll\n
142
+ _Pragma ( " unroll" ) \n
143
143
for ( i=0 ; i < BLOCK_SIZE; i++ )\n
144
144
{\n
145
145
*(d_dinvA+i*NB+tx) = Bs[i*BLOCK_SIZE+tx];\n
Original file line number Diff line number Diff line change @@ -59,7 +59,7 @@ __local double Bs[BLOCK_SIZE*BLOCK_SIZE]; \n
59
59
__local double workspace[BLOCK_SIZE];\n // workspace used to store the current working column
60
60
61
61
// load A \n
62
- # pragma unroll\n
62
+ _Pragma ( " unroll" ) \n
63
63
for (i = 0 ; i < BLOCK_SIZE; i++)\n
64
64
{ \n
65
65
if (tx <= i && i + bx*BLOCK_SIZE < na)\n
@@ -112,7 +112,7 @@ for (i = 0; i < BLOCK_SIZE; i++) {\n
112
112
workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
113
113
y = Bs + i*BLOCK_SIZE; \n
114
114
115
- # pragma unroll\n
115
+ _Pragma ( " unroll" ) \n
116
116
// for( j=tx; j < i; j++ )
117
117
for (j = 0 ; j < i; j++)\n
118
118
Ystx += switcher * (*(Bs + j*BLOCK_SIZE + tx)*workspace[j]); \n
@@ -138,7 +138,7 @@ for (i = 0; i < BLOCK_SIZE; i++) {\n
138
138
}\n
139
139
140
140
// write back A
141
- # pragma unroll\n
141
+ _Pragma ( " unroll" ) \n
142
142
for (i = 0 ; i < BLOCK_SIZE; i++)\n
143
143
*(d_dinvA + i*NB + tx) = Bs[i*BLOCK_SIZE + tx]; \n
144
144
You can’t perform that action at this time.
0 commit comments