Skip to content

Commit cdc2e2c

Browse files
committed
[SYSTEMDS-3794] Fix multi-threaded sparse matrix-vector elementwise ops
There was a regression where all sparse matrix-vector elementwise operations are now only executed single-threaded. This patch fixes the most important branch for sparse-safe matrix-vector operations, but in subsequent task we also need to fix all the other cases. When running connected components on the Europe road network, the individual binary multiply operations improved by 10-20x on a box with 48 vcores. End-to-end the entire components() invocation with 20 iterations improved from 282s (246s for b(*)) to 112s (75s for b(*)). The 10x improvements do not carry fully through because the output MCSR is converted to CSR when appending to the buffer pool (57s of 75s).
1 parent 4e00aa1 commit cdc2e2c

File tree

1 file changed

+23
-25
lines changed

1 file changed

+23
-25
lines changed

src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ public static MatrixBlock bincellOpScalar(MatrixBlock m1, MatrixBlock ret, Scala
184184
public static MatrixBlock bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
185185
try{
186186

187-
// Timing time = new Timing(true);
187+
//Timing time = new Timing(true);
188188
isValidDimensionsBinary(m1, m2);
189189
op = replaceOpWithSparseSafeIfApplicable(m1, m2, op);
190190

@@ -212,11 +212,13 @@ public static MatrixBlock bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock
212212
int k = op.getNumThreads();
213213

214214
// fallback to sequential computation for specialized operations
215+
// TODO fix all variants to be feasible for multi-threading
215216
if(k <= 1 || m1.isEmpty() || m2.isEmpty()
216217
|| ret.getLength() < PAR_NUMCELL_THRESHOLD2
217218
|| ((op.sparseSafe || isSparseSafeDivideOrPow(op, m2))
218219
&& !(atype == BinaryAccessType.MATRIX_MATRIX
219-
|| atype.isMatrixVector() && isAllDense(m1, m2, ret)))
220+
|| (atype.isMatrixVector() && isAllDense(m1, m2, ret))
221+
|| (atype.isMatrixVector() && m1.sparse && (m2.sparse || ret.sparse))))
220222
|| !CommonThreadPool.useParallelismOnThread())
221223
{
222224
bincellOpMatrixSingle(m1, m2, ret, op, atype);
@@ -227,7 +229,7 @@ public static MatrixBlock bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock
227229

228230
if(ret.isEmptyBlock(false))
229231
ret.examSparsity(k);
230-
// System.out.println("BinCell " + op + " " + m1.getNumRows() + ", " + m1.getNumColumns() + ", " + m1.getNonZeros()
232+
//System.out.println("BinCell " + op + " " + m1.getNumRows() + ", " + m1.getNumColumns() + ", " + m1.getNonZeros()
231233
// + " -- " + m2.getNumRows() + ", " + m2.getNumColumns() + " " + m2.getNonZeros() + "\t\t" + time.stop());
232234

233235
return ret;
@@ -732,7 +734,7 @@ else if( m1.sparse && !m2.sparse && !m2.isEmpty() && !ret.sparse
732734
&& atype == BinaryAccessType.MATRIX_ROW_VECTOR)
733735
safeBinaryMVSparseDenseRow(m1, m2, ret, op);
734736
else if( m1.sparse ) //SPARSE m1
735-
safeBinaryMVSparseLeft(m1, m2, ret, op);
737+
return safeBinaryMVSparseLeft(m1, m2, ret, op, rl, ru);
736738
else if( !m1.sparse && !m2.sparse && ret.sparse && op.fn instanceof Multiply
737739
&& atype == BinaryAccessType.MATRIX_COL_VECTOR
738740
&& (long)m1.rlen * m2.clen < Integer.MAX_VALUE )
@@ -977,39 +979,39 @@ else if( !skipEmpty && m2.isEmptyBlock(false) && (op.fn instanceof Minus || op.f
977979
ret.nonZeros = nnz;
978980
}
979981

980-
private static void safeBinaryMVSparseLeft(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
982+
private static long safeBinaryMVSparseLeft(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret,
983+
BinaryOperator op, int rl, int ru)
984+
{
981985
boolean isMultiply = (op.fn instanceof Multiply);
982986
boolean skipEmpty = (isMultiply || isSparseSafeDivideOrPow(op, m2));
983987
BinaryAccessType atype = getBinaryAccessType(m1, m2);
984988

985989
// early abort on skip and empty
986990
if(skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false)))
987-
return; // skip entire empty block
988-
991+
return 0; // skip entire empty block
989992

990993
if(atype == BinaryAccessType.MATRIX_COL_VECTOR)
991-
safeBinaryMVSparseLeftColVector(m1, m2, ret, op);
994+
safeBinaryMVSparseLeftColVector(m1, m2, ret, op, rl, ru);
992995
else if(atype == BinaryAccessType.MATRIX_ROW_VECTOR)
993-
safeBinaryMVSparseLeftRowVector(m1, m2, ret, op);
994-
995-
ret.recomputeNonZeros();
996+
safeBinaryMVSparseLeftRowVector(m1, m2, ret, op, rl, ru);
996997

998+
return ret.recomputeNonZeros(rl, ru-1);
997999
}
9981000

999-
@SuppressWarnings("null")
1000-
private static void safeBinaryMVSparseLeftColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
1001+
private static void safeBinaryMVSparseLeftColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret,
1002+
BinaryOperator op, int rl, int ru)
1003+
{
10011004
final boolean isMultiply = (op.fn instanceof Multiply);
10021005
final boolean skipEmpty = (isMultiply || isSparseSafeDivideOrPow(op, m2));
10031006

1004-
final int rlen = m1.rlen;
10051007
final int clen = m1.clen;
10061008
final SparseBlock a = m1.sparseBlock;
10071009
final boolean aNull = a == null;
10081010
if(skipEmpty && a == null)
10091011
return;
10101012
if(ret.isInSparseFormat()){
10111013
final SparseBlockMCSR rb = (SparseBlockMCSR) ret.getSparseBlock();
1012-
for(int i = 0; i < rlen; i++) {
1014+
for(int i = rl; i < ru; i++) {
10131015
final double v2 = m2.get(i, 0);
10141016
final boolean emptyRow = !aNull ? a.isEmpty(i) : true;
10151017
if((skipEmpty && (emptyRow || v2 == 0)) // skip empty one side zero
@@ -1029,7 +1031,7 @@ else if(!fill)
10291031
}
10301032
else{
10311033
final DenseBlock db = ret.getDenseBlock();
1032-
for(int i = 0; i < rlen; i++) {
1034+
for(int i = rl; i < ru; i++) {
10331035
final double v2 = m2.get(i, 0);
10341036

10351037
final boolean emptyRow = !aNull ? a.isEmpty(i) : true;
@@ -1045,7 +1047,6 @@ else if(!fill)
10451047
safeBinaryMVSparseColVectorRowNoFill(a, i, db, v2, emptyRow, op);
10461048
else // GENERAL CASE
10471049
safeBinaryMVSparseColVectorRowWithFill(a, i, db, vz, v2, clen, emptyRow, op);
1048-
10491050
}
10501051
}
10511052
}
@@ -1141,18 +1142,17 @@ private static final void fillZeroValuesScalar( double v, DenseBlock ret,
11411142
ret.set(rpos, rpos + 1, cpos, len, v);
11421143
}
11431144

1144-
@SuppressWarnings("null")
1145-
private static void safeBinaryMVSparseLeftRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
1145+
private static void safeBinaryMVSparseLeftRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret,
1146+
BinaryOperator op, int rl, int ru)
1147+
{
11461148
boolean isMultiply = (op.fn instanceof Multiply);
11471149
boolean skipEmpty = (isMultiply || isSparseSafeDivideOrPow(op, m2));
11481150

1149-
int rlen = m1.rlen;
11501151
int clen = m1.clen;
11511152
SparseBlock a = m1.sparseBlock;
11521153
if(ret.isInSparseFormat()){
11531154
SparseBlock sb = ret.getSparseBlock();
1154-
long nnz = 0;
1155-
for(int i = 0; i < rlen; i++) {
1155+
for(int i = rl; i < ru; i++) {
11561156
if(skipEmpty && (a == null || a.isEmpty(i)))
11571157
continue; // skip empty rows
11581158
if(skipEmpty && ret.sparse)
@@ -1170,18 +1170,16 @@ private static void safeBinaryMVSparseLeftRowVector(MatrixBlock m1, MatrixBlock
11701170
double v2 = m2.get(0, aix[j]);
11711171
double v = op.fn.execute(avals[j], v2);
11721172
sb.append(i, aix[j], v);
1173-
nnz += v != 0 ? 1 : 0;
11741173
lastIx = aix[j];
11751174
}
11761175
}
11771176
// empty left
11781177
fillZeroValues(op, m2, ret, skipEmpty, i, lastIx + 1, clen);
11791178
}
1180-
ret.setNonZeros(nnz);
11811179
}
11821180
else{
11831181
DenseBlock db = ret.getDenseBlock();
1184-
for(int i = 0; i < rlen; i++){
1182+
for(int i = rl; i < ru; i++){
11851183
if(skipEmpty && (a == null || a.isEmpty(i)))
11861184
continue; // skip empty rows
11871185
if(skipEmpty && ret.sparse)

0 commit comments

Comments
 (0)