@@ -43,6 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4343#include "ddot_microk_sandy-2.c"
4444#endif
4545
46+ #if !defined(DSDOT )
47+ #define RETURN_TYPE FLOAT
48+ #else
49+ #define RETURN_TYPE double
50+ #endif
51+
4652
4753#ifndef HAVE_KERNEL_8
4854
@@ -71,7 +77,7 @@ static void ddot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
7177
7278#endif
7379
74- FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
80+ static FLOAT dot_compute (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
7581{
7682 BLASLONG i = 0 ;
7783 BLASLONG ix = 0 ,iy = 0 ;
@@ -139,4 +145,63 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
139145
140146}
141147
148+ #if defined(SMP )
149+ static int dot_thread_function (BLASLONG n , BLASLONG dummy0 ,
150+ BLASLONG dummy1 , FLOAT dummy2 , FLOAT * x , BLASLONG inc_x , FLOAT * y ,
151+ BLASLONG inc_y , RETURN_TYPE * result , BLASLONG dummy3 )
152+ {
153+ * (RETURN_TYPE * )result = dot_compute (n , x , inc_x , y , inc_y );
154+
155+ return 0 ;
156+ }
157+
158+ extern int blas_level1_thread_with_return_value (int mode , BLASLONG m , BLASLONG n ,
159+ BLASLONG k , void * alpha , void * a , BLASLONG lda , void * b , BLASLONG ldb ,
160+ void * c , BLASLONG ldc , int (* function )(), int nthreads );
161+ #endif
142162
163+ FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
164+ {
165+ #if defined(SMP )
166+ int nthreads ;
167+ FLOAT dummy_alpha ;
168+ #endif
169+ FLOAT dot = 0.0 ;
170+
171+ #if defined(SMP )
172+ nthreads = num_cpu_avail (1 );
173+
174+ if (inc_x == 0 || inc_y == 0 )
175+ nthreads = 1 ;
176+
177+ if (n <= 10000 )
178+ nthreads = 1 ;
179+
180+ if (nthreads == 1 ) {
181+ dot = dot_compute (n , x , inc_x , y , inc_y );
182+ } else {
183+ int mode , i ;
184+ char result [MAX_CPU_NUMBER * sizeof (double ) * 2 ];
185+ RETURN_TYPE * ptr ;
186+
187+ #if !defined(DOUBLE )
188+ mode = BLAS_SINGLE | BLAS_REAL ;
189+ #else
190+ mode = BLAS_DOUBLE | BLAS_REAL ;
191+ #endif
192+ blas_level1_thread_with_return_value (mode , n , 0 , 0 , & dummy_alpha ,
193+ x , inc_x , y , inc_y , result , 0 ,
194+ ( void * )dot_thread_function , nthreads );
195+
196+ ptr = (RETURN_TYPE * )result ;
197+ for (i = 0 ; i < nthreads ; i ++ ) {
198+ dot = dot + (* ptr );
199+ ptr = (RETURN_TYPE * )(((char * )ptr ) + sizeof (double ) * 2 );
200+ }
201+ }
202+ #else
203+ dot = dot_compute (n , x , inc_x , y , inc_y );
204+ #endif
205+
206+ return dot ;
207+ }
0 commit comments