Skip to content

Commit e40bbfe

Browse files
HubertKrawczykmboehm7
authored andcommitted
[SYSTEMDS-3920] Vector API for Codegen Outer-Products
Closes #2349.
1 parent 67ff6be commit e40bbfe

File tree

1 file changed

+3
-18
lines changed

1 file changed

+3
-18
lines changed

src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -109,27 +109,12 @@ public static void vectOuterMultAdd(double[] a, double[] b, double[] c, int ai,
109109
}
110110
}
111111
else {
112-
//rest, not aligned to 4-blocks
113-
final int bn = len1%4;
114-
for( int i=0, cix=ci; i < bn; i++, cix+=len2 )
112+
for( int i=0, cix=ci; i < len1; i++, cix+=len2)
115113
if( a[ai+i] != 0 )
116114
LibMatrixMult.vectMultiplyAdd(a[ai+i], b, c, bi, cix, len2);
117-
118-
//unrolled 4-block (for fewer L1-dcache loads)
119-
for( int i=bn, cix=ci+bn*len2; i < len1; i+=4, cix+=4*len2 ) {
120-
final int cix1=cix, cix2=cix+len2, cix3=cix+2*len2, cix4=cix+3*len2;
121-
final double aval1=a[ai+i], aval2=a[ai+i+1], aval3=a[ai+i+2], aval4=a[ai+i+3];
122-
for( int j=0; j<len2; j++ ) {
123-
final double bval = b[bi+j];
124-
c[cix1 + j] += aval1 * bval;
125-
c[cix2 + j] += aval2 * bval;
126-
c[cix3 + j] += aval3 * bval;
127-
c[cix4 + j] += aval4 * bval;
128-
}
129-
}
130-
}
115+
}
131116
}
132-
117+
133118
public static void vectOuterMultAdd(double[] a, double[] b, double[] c, int[] aix, int ai, int bi, int ci, int alen, int len1, int len2) {
134119
if( isFlipOuter(len1, len2) ) {
135120
for( int i=0, cix=ci; i < len2; i++, cix+=len1 ) {

0 commit comments

Comments
 (0)