@@ -43,6 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43
43
#include "ddot_microk_sandy-2.c"
44
44
#endif
45
45
46
+ #if !defined(DSDOT )
47
+ #define RETURN_TYPE FLOAT
48
+ #else
49
+ #define RETURN_TYPE double
50
+ #endif
51
+
46
52
47
53
#ifndef HAVE_KERNEL_8
48
54
@@ -71,7 +77,7 @@ static void ddot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
71
77
72
78
#endif
73
79
74
- FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
80
+ static FLOAT dot_compute (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
75
81
{
76
82
BLASLONG i = 0 ;
77
83
BLASLONG ix = 0 ,iy = 0 ;
@@ -139,4 +145,63 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
139
145
140
146
}
141
147
148
+ #if defined(SMP )
149
+ static int dot_thread_function (BLASLONG n , BLASLONG dummy0 ,
150
+ BLASLONG dummy1 , FLOAT dummy2 , FLOAT * x , BLASLONG inc_x , FLOAT * y ,
151
+ BLASLONG inc_y , RETURN_TYPE * result , BLASLONG dummy3 )
152
+ {
153
+ * (RETURN_TYPE * )result = dot_compute (n , x , inc_x , y , inc_y );
154
+
155
+ return 0 ;
156
+ }
157
+
158
+ extern int blas_level1_thread_with_return_value (int mode , BLASLONG m , BLASLONG n ,
159
+ BLASLONG k , void * alpha , void * a , BLASLONG lda , void * b , BLASLONG ldb ,
160
+ void * c , BLASLONG ldc , int (* function )(), int nthreads );
161
+ #endif
142
162
163
+ FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
164
+ {
165
+ #if defined(SMP )
166
+ int nthreads ;
167
+ FLOAT dummy_alpha ;
168
+ #endif
169
+ FLOAT dot = 0.0 ;
170
+
171
+ #if defined(SMP )
172
+ nthreads = num_cpu_avail (1 );
173
+
174
+ if (inc_x == 0 || inc_y == 0 )
175
+ nthreads = 1 ;
176
+
177
+ if (n <= 10000 )
178
+ nthreads = 1 ;
179
+
180
+ if (nthreads == 1 ) {
181
+ dot = dot_compute (n , x , inc_x , y , inc_y );
182
+ } else {
183
+ int mode , i ;
184
+ char result [MAX_CPU_NUMBER * sizeof (double ) * 2 ];
185
+ RETURN_TYPE * ptr ;
186
+
187
+ #if !defined(DOUBLE )
188
+ mode = BLAS_SINGLE | BLAS_REAL ;
189
+ #else
190
+ mode = BLAS_DOUBLE | BLAS_REAL ;
191
+ #endif
192
+ blas_level1_thread_with_return_value (mode , n , 0 , 0 , & dummy_alpha ,
193
+ x , inc_x , y , inc_y , result , 0 ,
194
+ ( void * )dot_thread_function , nthreads );
195
+
196
+ ptr = (RETURN_TYPE * )result ;
197
+ for (i = 0 ; i < nthreads ; i ++ ) {
198
+ dot = dot + (* ptr );
199
+ ptr = (RETURN_TYPE * )(((char * )ptr ) + sizeof (double ) * 2 );
200
+ }
201
+ }
202
+ #else
203
+ dot = dot_compute (n , x , inc_x , y , inc_y );
204
+ #endif
205
+
206
+ return dot ;
207
+ }
0 commit comments