File tree Expand file tree Collapse file tree 4 files changed +10
-3
lines changed Expand file tree Collapse file tree 4 files changed +10
-3
lines changed Original file line number Diff line number Diff line change @@ -101,6 +101,7 @@ else\n
101
101
}\n
102
102
}\n
103
103
104
+ barrier (CLK_LOCAL_MEM_FENCE);\n
104
105
/*
105
106
* the lower case
106
107
*/
@@ -135,6 +136,7 @@ for (i = BLOCK_SIZE - 2; i >= 0; i--) {\n
135
136
workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
136
137
x = workspace + i + 1 ; \n
137
138
y = Bs + i*BLOCK_SIZE; \n
139
+ barrier (CLK_LOCAL_MEM_FENCE);\n
138
140
139
141
txw = (tx - i - 1 ); \n
140
142
Original file line number Diff line number Diff line change @@ -94,6 +94,7 @@ uint na)\n
94
94
Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
95
95
}\n
96
96
}\n
97
+ barrier (CLK_LOCAL_MEM_FENCE);\n
97
98
98
99
/* the upper case */
99
100
for ( i=0 ; i < BLOCK_SIZE; i++ ) {\n
@@ -110,6 +111,7 @@ uint na)\n
110
111
// dtrmv
111
112
workspace[tx] = *(Bs+i*BLOCK_SIZE+tx);\n
112
113
y = Bs+i*BLOCK_SIZE;\n
114
+ barrier (CLK_LOCAL_MEM_FENCE);\n
113
115
114
116
_Pragma (" unroll" )\n
115
117
// for( j=tx; j < i; j++ )
Original file line number Diff line number Diff line change 94
94
Bs[tx*BLOCK_SIZE + tx] = ONE / (Bs[tx*BLOCK_SIZE + tx]); \n
95
95
}\n
96
96
}\n
97
+ barrier (CLK_LOCAL_MEM_FENCE);\n
97
98
98
99
99
100
/* the upper case */
@@ -111,6 +112,7 @@ for (i = 0; i < BLOCK_SIZE; i++) {\n
111
112
// dtrmv
112
113
workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
113
114
y = Bs + i*BLOCK_SIZE; \n
115
+ barrier (CLK_LOCAL_MEM_FENCE);\n
114
116
115
117
_Pragma (" unroll" )\n
116
118
// for( j=tx; j < i; j++ )
Original file line number Diff line number Diff line change @@ -1683,7 +1683,7 @@ clblasDtrsm(
1683
1683
const cl_event *eventWaitList,
1684
1684
cl_event *events)
1685
1685
{
1686
- /*
1686
+ # if 0
1687
1687
CHECK_QUEUES(numCommandQueues, commandQueues);
1688
1688
CHECK_EVENTS(numEventsInWaitList, eventWaitList);
1689
1689
@@ -1718,7 +1718,8 @@ clblasDtrsm(
1718
1718
functor->release();
1719
1719
1720
1720
return res;
1721
- */
1721
+
1722
+ #else
1722
1723
bool specialCaseHandled = false ;
1723
1724
1724
1725
// outer block size = 192
@@ -1780,7 +1781,7 @@ clblasDtrsm(
1780
1781
numEventsInWaitList,
1781
1782
eventWaitList,
1782
1783
events);
1783
-
1784
+ # endif
1784
1785
}
1785
1786
1786
1787
extern " C"
You can’t perform that action at this time.
0 commit comments