Skip to content

Commit 5e3e91d

Browse files
authored
Split the microkernel workload into chunks of 32 floats for dsdot mode to limit loss of precision
1 parent 28c3fa8 commit 5e3e91d

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

kernel/x86_64/sdot.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
7878
BLASLONG ix=0,iy=0;
7979
double dot = 0.0 ;
8080

81+
#if defined (DSDOT)
82+
double mydot = 0.0;
83+
FLOAT asmdot = 0.0;
84+
#else
8185
FLOAT mydot=0.0;
86+
#endif
8287
BLASLONG n1;
8388

8489
if ( n <= 0 ) return(dot);
@@ -89,9 +94,23 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
8994
n1 = n & (BLASLONG)(-32);
9095

9196
if ( n1 )
97+
#if defined(DSDOT)
98+
{
99+
FLOAT *x1=x;
100+
FLOAT *y1=y;
101+
BLASLONG n2 = 32;
102+
while (i<n1) {
103+
sdot_kernel_16(n2, x1, y1 , &asmdot );
104+
mydot += (double)asmdot;
105+
asmdot=0.;
106+
x1+=32;
107+
y1+=32;
108+
i+=32;
109+
}
110+
}
111+
#else
92112
sdot_kernel_16(n1, x, y , &mydot );
93-
94-
113+
#endif
95114
i = n1;
96115
while(i < n)
97116
{
@@ -103,11 +122,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
103122
i++ ;
104123

105124
}
106-
#if defined(DSDOT)
107-
dot+=(double)mydot;
108-
#else
125+
109126
dot+=mydot;
110-
#endif
111127
return(dot);
112128

113129

0 commit comments

Comments
 (0)