1
1
/***************************************************************************
2
- Copyright (c) 2013-2016 , The OpenBLAS Project
2
+ Copyright (c) 2013-2017 , The OpenBLAS Project
3
3
All rights reserved.
4
4
Redistribution and use in source and binary forms, with or without
5
5
modification, are permitted provided that the following conditions are
@@ -66,42 +66,76 @@ static FLOAT sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y)
66
66
67
67
#endif
68
68
69
+ #if defined (DSDOT )
70
+ double CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
71
+ #else
69
72
FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
73
+ #endif
70
74
{
71
75
BLASLONG i = 0 ;
72
76
BLASLONG ix = 0 ,iy = 0 ;
77
+ double dot = 0.0 ;
73
78
74
- FLOAT dot = 0.0 ;
79
+ #if defined (DSDOT )
80
+ double mydot = 0.0 ;
81
+ FLOAT asmdot = 0.0 ;
82
+ #else
83
+ FLOAT mydot = 0.0 ;
84
+ #endif
85
+ BLASLONG n1 ;
75
86
76
87
if ( n <= 0 ) return (dot );
77
88
78
89
if ( (inc_x == 1 ) && (inc_y == 1 ) )
79
90
{
80
91
81
- BLASLONG n1 = n & -32 ;
92
+ n1 = n & ( BLASLONG )( -32 ) ;
82
93
83
94
if ( n1 )
84
- dot = sdot_kernel_16 (n1 , x , y );
85
-
95
+ #if defined(DSDOT )
96
+ {
97
+ FLOAT * x1 = x ;
98
+ FLOAT * y1 = y ;
99
+ BLASLONG n2 = 32 ;
100
+ while (i < n1 ) {
101
+ asmdot = sdot_kernel_16 (n2 , x1 , y1 );
102
+ mydot += (double )asmdot ;
103
+ asmdot = 0. ;
104
+ x1 += 32 ;
105
+ y1 += 32 ;
106
+ i += 32 ;
107
+ }
108
+ }
109
+ #else
110
+ mydot = sdot_kernel_16 (n1 , x , y );
111
+ #endif
86
112
i = n1 ;
87
113
while (i < n )
88
114
{
89
-
115
+ #if defined(DSDOT )
116
+ dot += (double )y [i ] * (double )x [i ] ;
117
+ #else
90
118
dot += y [i ] * x [i ] ;
119
+ #endif
91
120
i ++ ;
92
121
93
122
}
123
+
124
+ dot += mydot ;
94
125
return (dot );
95
126
96
127
97
128
}
98
129
99
- BLASLONG n1 = n & -2 ;
130
+ n1 = n & ( BLASLONG )( -2 ) ;
100
131
101
132
while (i < n1 )
102
133
{
103
-
134
+ #if defined (DSDOT )
135
+ dot += (double )y [iy ] * (double )x [ix ] + (double )y [iy + inc_y ] * (double )x [ix + inc_x ];
136
+ #else
104
137
dot += y [iy ] * x [ix ] + y [iy + inc_y ] * x [ix + inc_x ];
138
+ #endif
105
139
ix += inc_x * 2 ;
106
140
iy += inc_y * 2 ;
107
141
i += 2 ;
@@ -110,8 +144,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
110
144
111
145
while (i < n )
112
146
{
113
-
147
+ #if defined (DSDOT )
148
+ dot += (double )y [iy ] * (double )x [ix ] ;
149
+ #else
114
150
dot += y [iy ] * x [ix ] ;
151
+ #endif
115
152
ix += inc_x ;
116
153
iy += inc_y ;
117
154
i ++ ;
0 commit comments