@@ -109,27 +109,12 @@ public static void vectOuterMultAdd(double[] a, double[] b, double[] c, int ai,
109109 }
110110 }
111111 else {
112- //rest, not aligned to 4-blocks
113- final int bn = len1 %4 ;
114- for ( int i =0 , cix =ci ; i < bn ; i ++, cix +=len2 )
112+ for ( int i =0 , cix =ci ; i < len1 ; i ++, cix +=len2 )
115113 if ( a [ai +i ] != 0 )
116114 LibMatrixMult .vectMultiplyAdd (a [ai +i ], b , c , bi , cix , len2 );
117-
118- //unrolled 4-block (for fewer L1-dcache loads)
119- for ( int i =bn , cix =ci +bn *len2 ; i < len1 ; i +=4 , cix +=4 *len2 ) {
120- final int cix1 =cix , cix2 =cix +len2 , cix3 =cix +2 *len2 , cix4 =cix +3 *len2 ;
121- final double aval1 =a [ai +i ], aval2 =a [ai +i +1 ], aval3 =a [ai +i +2 ], aval4 =a [ai +i +3 ];
122- for ( int j =0 ; j <len2 ; j ++ ) {
123- final double bval = b [bi +j ];
124- c [cix1 + j ] += aval1 * bval ;
125- c [cix2 + j ] += aval2 * bval ;
126- c [cix3 + j ] += aval3 * bval ;
127- c [cix4 + j ] += aval4 * bval ;
128- }
129- }
130- }
115+ }
131116 }
132-
117+
133118 public static void vectOuterMultAdd (double [] a , double [] b , double [] c , int [] aix , int ai , int bi , int ci , int alen , int len1 , int len2 ) {
134119 if ( isFlipOuter (len1 , len2 ) ) {
135120 for ( int i =0 , cix =ci ; i < len2 ; i ++, cix +=len1 ) {
0 commit comments