Skip to content

Commit 6166d07

Browse files
committed
added CLALibBinaryCellOp binary matrix-vector left op sparse task
1 parent 008c104 commit 6166d07

File tree

1 file changed

+68
-67
lines changed

1 file changed

+68
-67
lines changed

src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java

Lines changed: 68 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,6 @@ private static void stackConstGroup(MatrixBlock m2, BinaryOperator op, boolean l
400400
private static MatrixBlock mvColCompressed(CompressedMatrixBlock m1, MatrixBlock m2, BinaryOperator op, boolean left)
401401
throws Exception {
402402

403-
// TODO: if left==true, m1 is the left side and a vector, m2 a matrix --> wrong out dims
404403
final int nCols = m1.getNumColumns();
405404
final int nRows = m1.getNumRows();
406405
m1 = morph(m1);
@@ -570,21 +569,15 @@ private static long binaryMVColSingleThreadDense(CompressedMatrixBlock m1, Matri
570569
boolean left, MatrixBlock ret) {
571570
final int nRows = m1.getNumRows();
572571
long nnz = 0;
573-
if(left)
574-
nnz += new BinaryMVColLeftTaskDense(m1, m2, ret, 0, nRows, op).call();
575-
else
576-
nnz += new BinaryMVColTaskDense(m1, m2, ret, 0, nRows, op).call();
572+
nnz += new BinaryMVColTaskDense(m1, m2, ret, 0, nRows, op, left).call();
577573
return nnz;
578574
}
579575

580576
private static long binaryMVColSingleThreadSparse(CompressedMatrixBlock m1, MatrixBlock m2, BinaryOperator op,
581577
boolean left, MatrixBlock ret) {
582578
final int nRows = m1.getNumRows();
583579
long nnz = 0;
584-
if(left)
585-
throw new NotImplementedException();
586-
else
587-
nnz += new BinaryMVColTaskSparse(m1, m2, ret, 0, nRows, op).call();
580+
nnz += new BinaryMVColTaskSparse(m1, m2, ret, 0, nRows, op, left).call();
588581
return nnz;
589582
}
590583

@@ -598,10 +591,7 @@ private static long binaryMVColMultiThreadDense(CompressedMatrixBlock m1, Matrix
598591
try {
599592
final ArrayList<Callable<Long>> tasks = new ArrayList<>();
600593
for(int i = 0; i < nRows; i += blkz) {
601-
if(left)
602-
tasks.add(new BinaryMVColLeftTaskDense(m1, m2, ret, i, Math.min(nRows, i + blkz), op));
603-
else
604-
tasks.add(new BinaryMVColTaskDense(m1, m2, ret, i, Math.min(nRows, i + blkz), op));
594+
tasks.add(new BinaryMVColTaskDense(m1, m2, ret, i, Math.min(nRows, i + blkz), op, left));
605595
}
606596
for(Future<Long> f : pool.invokeAll(tasks))
607597
nnz += f.get();
@@ -622,10 +612,7 @@ private static long binaryMVColMultiThreadSparse(CompressedMatrixBlock m1, Matri
622612
try {
623613
final ArrayList<Callable<Long>> tasks = new ArrayList<>();
624614
for(int i = 0; i < nRows; i += blkz) {
625-
if(left)
626-
throw new NotImplementedException();
627-
else
628-
tasks.add(new BinaryMVColTaskSparse(m1, m2, ret, i, Math.min(nRows, i + blkz), op));
615+
tasks.add(new BinaryMVColTaskSparse(m1, m2, ret, i, Math.min(nRows, i + blkz), op, left));
629616
}
630617
for(Future<Long> f : pool.invokeAll(tasks))
631618
nnz += f.get();
@@ -774,15 +761,17 @@ private static class BinaryMVColTaskDense implements Callable<Long> {
774761
private final MatrixBlock _m2;
775762
private final MatrixBlock _ret;
776763
private final BinaryOperator _op;
764+
private boolean _left;
777765

778766
protected BinaryMVColTaskDense(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru,
779-
BinaryOperator op) {
767+
BinaryOperator op, boolean left) {
780768
_m1 = m1;
781769
_m2 = m2;
782770
_ret = ret;
783771
_op = op;
784772
_rl = rl;
785773
_ru = ru;
774+
_left = left;
786775
}
787776

788777
@Override
@@ -792,8 +781,12 @@ public Long call() {
792781

793782
final AIterator[] its = getIterators(groups, _rl);
794783

795-
for(int r = _rl; r < _ru; r += _blklen)
796-
processBlock(r, Math.min(r + _blklen, _ru), groups, its);
784+
if(!_left)
785+
for(int r = _rl; r < _ru; r += _blklen)
786+
processBlock(r, Math.min(r + _blklen, _ru), groups, its);
787+
else
788+
for(int r = _rl; r < _ru; r += _blklen)
789+
processBlockLeft(r, Math.min(r + _blklen, _ru), groups, its);
797790

798791
return _ret.recomputeNonZeros(_rl, _ru - 1);
799792
}
@@ -805,6 +798,13 @@ private final void processBlock(final int rl, final int ru, final List<AColGroup
805798
processGenericDense(rl, ru);
806799
}
807800

801+
private final void processBlockLeft(final int rl, final int ru, final List<AColGroup> groups, final AIterator[] its) {
802+
// unsafe decompress, since we count nonzeros afterwards.
803+
final DenseBlock db = _ret.getDenseBlock();
804+
decompressToSubBlock(rl, ru, db, groups, its);
805+
processGenericDenseLeft(rl, ru);
806+
}
807+
808808
private final void processGenericDense(final int rl, final int ru) {
809809
final int ncol = _m1.getNumColumns();
810810
final DenseBlock rd = _ret.getDenseBlock();
@@ -818,11 +818,29 @@ private final void processGenericDense(final int rl, final int ru) {
818818
}
819819
}
820820

821+
private final void processGenericDenseLeft(final int rl, final int ru) {
822+
final int ncol = _m1.getNumColumns();
823+
final DenseBlock rd = _ret.getDenseBlock();
824+
// m2 is a vector therefore guaranteed continuous.
825+
final double[] _m2Dense = _m2.getDenseBlockValues();
826+
for(int row = rl; row < ru; row++) {
827+
final double[] retDense = rd.values(row);
828+
final int posR = rd.pos(row);
829+
final double vr = _m2Dense[row];
830+
processRowLeft(ncol, retDense, posR, vr);
831+
}
832+
}
833+
821834
private void processRow(final int ncol, final double[] ret, final int posR, final double vr) {
822835
for(int col = 0; col < ncol; col++)
823836
ret[posR + col] = _op.fn.execute(ret[posR + col], vr);
824837
}
825838

839+
private void processRowLeft(final int ncol, final double[] ret, final int posR, final double vr) {
840+
for(int col = 0; col < ncol; col++)
841+
ret[posR + col] = _op.fn.execute(vr,ret[posR + col]);
842+
}
843+
826844
}
827845

828846
private static class BinaryMVColTaskSparse implements Callable<Long> {
@@ -835,14 +853,17 @@ private static class BinaryMVColTaskSparse implements Callable<Long> {
835853

836854
private MatrixBlock tmp;
837855

856+
private boolean _left;
857+
838858
protected BinaryMVColTaskSparse(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru,
839-
BinaryOperator op) {
859+
BinaryOperator op, boolean left) {
840860
_m1 = m1;
841861
_m2 = m2;
842862
_ret = ret;
843863
_op = op;
844864
_rl = rl;
845865
_ru = ru;
866+
_left = left;
846867
}
847868

848869
@Override
@@ -851,10 +872,12 @@ public Long call() {
851872
final int _blklen = tmp.getNumRows();
852873
final List<AColGroup> groups = _m1.getColGroups();
853874
final AIterator[] its = getIterators(groups, _rl);
854-
855-
for(int r = _rl; r < _ru; r += _blklen)
856-
processBlock(r, Math.min(r + _blklen, _ru), groups, its);
857-
875+
if(!_left)
876+
for(int r = _rl; r < _ru; r += _blklen)
877+
processBlock(r, Math.min(r + _blklen, _ru), groups, its);
878+
else
879+
for(int r = _rl; r < _ru; r += _blklen)
880+
processBlockLeft(r, Math.min(r + _blklen, _ru), groups, its);
858881
return _ret.recomputeNonZeros(_rl, _ru - 1);
859882
}
860883

@@ -864,6 +887,12 @@ private final void processBlock(final int rl, final int ru, final List<AColGroup
864887
tmp.reset();
865888
}
866889

890+
private final void processBlockLeft(final int rl, final int ru, final List<AColGroup> groups, final AIterator[] its) {
891+
decompressToTmpBlock(rl, ru, tmp.getDenseBlock(), groups, its);
892+
processDenseLeft(rl, ru);
893+
tmp.reset();
894+
}
895+
867896
private final void processDense(final int rl, final int ru) {
868897
final int nCol = _m1.getNumColumns();
869898
final SparseBlock sb = _ret.getSparseBlock();
@@ -877,6 +906,20 @@ private final void processDense(final int rl, final int ru) {
877906

878907
}
879908
}
909+
910+
private final void processDenseLeft(final int rl, final int ru) {
911+
final int nCol = _m1.getNumColumns();
912+
final SparseBlock sb = _ret.getSparseBlock();
913+
final double[] _tmpDense = tmp.getDenseBlockValues();
914+
final double[] _m2Dense = _m2.getDenseBlockValues();
915+
for(int row = rl; row < ru; row++) {
916+
final double vr = _m2Dense[row];
917+
final int tmpOff = (row - rl) * nCol;
918+
for(int col = 0; col < nCol; col++)
919+
sb.append(row, col, _op.fn.execute(vr, _tmpDense[tmpOff + col]));
920+
921+
}
922+
}
880923
}
881924

882925
private static MatrixBlock allocateTempUncompressedBlock(int cols) {
@@ -1033,48 +1076,6 @@ private final void processRightDense(final int rl, final int ru) {
10331076
}
10341077
}
10351078

1036-
private static class BinaryMVColLeftTaskDense implements Callable<Long> {
1037-
private final int _rl;
1038-
private final int _ru;
1039-
private final CompressedMatrixBlock _m1;
1040-
private final MatrixBlock _m2;
1041-
private final MatrixBlock _ret;
1042-
private final BinaryOperator _op;
1043-
1044-
protected BinaryMVColLeftTaskDense(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru,
1045-
BinaryOperator op) {
1046-
_m1 = m1;
1047-
_m2 = m2;
1048-
_ret = ret;
1049-
_op = op;
1050-
_rl = rl;
1051-
_ru = ru;
1052-
}
1053-
1054-
@Override
1055-
public Long call() {
1056-
for(AColGroup g : _m1.getColGroups())
1057-
g.decompressToDenseBlock(_ret.getDenseBlock(), _rl, _ru);
1058-
1059-
// m2 is never sparse or empty. always dense here.
1060-
final int ncol = _m1.getNumColumns();
1061-
int offset = _rl * ncol;
1062-
double[] _retDense = _ret.getDenseBlockValues();
1063-
double[] _m2Dense = _m2.getDenseBlockValues();
1064-
for(int row = _rl; row < _ru; row++) {
1065-
double vr = _m2Dense[row];
1066-
for(int col = 0; col < ncol; col++) {
1067-
double v = _op.fn.execute(vr, _retDense[offset]);
1068-
_retDense[offset] = v;
1069-
offset++;
1070-
}
1071-
}
1072-
1073-
return _ret.recomputeNonZeros(_rl, _ru - 1);
1074-
1075-
}
1076-
}
1077-
10781079
private static abstract class BinaryMVRowTask implements Callable<AColGroup> {
10791080
protected final AColGroup _group;
10801081
protected final double[] _v;

0 commit comments

Comments
 (0)