@@ -400,7 +400,6 @@ private static void stackConstGroup(MatrixBlock m2, BinaryOperator op, boolean l
400400 private static MatrixBlock mvColCompressed (CompressedMatrixBlock m1 , MatrixBlock m2 , BinaryOperator op , boolean left )
401401 throws Exception {
402402
403- // TODO: if left==true, m1 is the left side and a vector, m2 a matrix --> wrong out dims
404403 final int nCols = m1 .getNumColumns ();
405404 final int nRows = m1 .getNumRows ();
406405 m1 = morph (m1 );
@@ -570,21 +569,15 @@ private static long binaryMVColSingleThreadDense(CompressedMatrixBlock m1, Matri
570569 boolean left , MatrixBlock ret ) {
571570 final int nRows = m1 .getNumRows ();
572571 long nnz = 0 ;
573- if (left )
574- nnz += new BinaryMVColLeftTaskDense (m1 , m2 , ret , 0 , nRows , op ).call ();
575- else
576- nnz += new BinaryMVColTaskDense (m1 , m2 , ret , 0 , nRows , op ).call ();
572+ nnz += new BinaryMVColTaskDense (m1 , m2 , ret , 0 , nRows , op , left ).call ();
577573 return nnz ;
578574 }
579575
580576 private static long binaryMVColSingleThreadSparse (CompressedMatrixBlock m1 , MatrixBlock m2 , BinaryOperator op ,
581577 boolean left , MatrixBlock ret ) {
582578 final int nRows = m1 .getNumRows ();
583579 long nnz = 0 ;
584- if (left )
585- throw new NotImplementedException ();
586- else
587- nnz += new BinaryMVColTaskSparse (m1 , m2 , ret , 0 , nRows , op ).call ();
580+ nnz += new BinaryMVColTaskSparse (m1 , m2 , ret , 0 , nRows , op , left ).call ();
588581 return nnz ;
589582 }
590583
@@ -598,10 +591,7 @@ private static long binaryMVColMultiThreadDense(CompressedMatrixBlock m1, Matrix
598591 try {
599592 final ArrayList <Callable <Long >> tasks = new ArrayList <>();
600593 for (int i = 0 ; i < nRows ; i += blkz ) {
601- if (left )
602- tasks .add (new BinaryMVColLeftTaskDense (m1 , m2 , ret , i , Math .min (nRows , i + blkz ), op ));
603- else
604- tasks .add (new BinaryMVColTaskDense (m1 , m2 , ret , i , Math .min (nRows , i + blkz ), op ));
594+ tasks .add (new BinaryMVColTaskDense (m1 , m2 , ret , i , Math .min (nRows , i + blkz ), op , left ));
605595 }
606596 for (Future <Long > f : pool .invokeAll (tasks ))
607597 nnz += f .get ();
@@ -622,10 +612,7 @@ private static long binaryMVColMultiThreadSparse(CompressedMatrixBlock m1, Matri
622612 try {
623613 final ArrayList <Callable <Long >> tasks = new ArrayList <>();
624614 for (int i = 0 ; i < nRows ; i += blkz ) {
625- if (left )
626- throw new NotImplementedException ();
627- else
628- tasks .add (new BinaryMVColTaskSparse (m1 , m2 , ret , i , Math .min (nRows , i + blkz ), op ));
615+ tasks .add (new BinaryMVColTaskSparse (m1 , m2 , ret , i , Math .min (nRows , i + blkz ), op , left ));
629616 }
630617 for (Future <Long > f : pool .invokeAll (tasks ))
631618 nnz += f .get ();
@@ -774,15 +761,17 @@ private static class BinaryMVColTaskDense implements Callable<Long> {
774761 private final MatrixBlock _m2 ;
775762 private final MatrixBlock _ret ;
776763 private final BinaryOperator _op ;
764+ private boolean _left ;
777765
778766 protected BinaryMVColTaskDense (CompressedMatrixBlock m1 , MatrixBlock m2 , MatrixBlock ret , int rl , int ru ,
779- BinaryOperator op ) {
767+ BinaryOperator op , boolean left ) {
780768 _m1 = m1 ;
781769 _m2 = m2 ;
782770 _ret = ret ;
783771 _op = op ;
784772 _rl = rl ;
785773 _ru = ru ;
774+ _left = left ;
786775 }
787776
788777 @ Override
@@ -792,8 +781,12 @@ public Long call() {
792781
793782 final AIterator [] its = getIterators (groups , _rl );
794783
795- for (int r = _rl ; r < _ru ; r += _blklen )
796- processBlock (r , Math .min (r + _blklen , _ru ), groups , its );
784+ if (!_left )
785+ for (int r = _rl ; r < _ru ; r += _blklen )
786+ processBlock (r , Math .min (r + _blklen , _ru ), groups , its );
787+ else
788+ for (int r = _rl ; r < _ru ; r += _blklen )
789+ processBlockLeft (r , Math .min (r + _blklen , _ru ), groups , its );
797790
798791 return _ret .recomputeNonZeros (_rl , _ru - 1 );
799792 }
@@ -805,6 +798,13 @@ private final void processBlock(final int rl, final int ru, final List<AColGroup
805798 processGenericDense (rl , ru );
806799 }
807800
801+ private final void processBlockLeft (final int rl , final int ru , final List <AColGroup > groups , final AIterator [] its ) {
802+ // unsafe decompress, since we count nonzeros afterwards.
803+ final DenseBlock db = _ret .getDenseBlock ();
804+ decompressToSubBlock (rl , ru , db , groups , its );
805+ processGenericDenseLeft (rl , ru );
806+ }
807+
808808 private final void processGenericDense (final int rl , final int ru ) {
809809 final int ncol = _m1 .getNumColumns ();
810810 final DenseBlock rd = _ret .getDenseBlock ();
@@ -818,11 +818,29 @@ private final void processGenericDense(final int rl, final int ru) {
818818 }
819819 }
820820
821+ private final void processGenericDenseLeft (final int rl , final int ru ) {
822+ final int ncol = _m1 .getNumColumns ();
823+ final DenseBlock rd = _ret .getDenseBlock ();
824+ // m2 is a vector therefore guaranteed continuous.
825+ final double [] _m2Dense = _m2 .getDenseBlockValues ();
826+ for (int row = rl ; row < ru ; row ++) {
827+ final double [] retDense = rd .values (row );
828+ final int posR = rd .pos (row );
829+ final double vr = _m2Dense [row ];
830+ processRowLeft (ncol , retDense , posR , vr );
831+ }
832+ }
833+
821834 private void processRow (final int ncol , final double [] ret , final int posR , final double vr ) {
822835 for (int col = 0 ; col < ncol ; col ++)
823836 ret [posR + col ] = _op .fn .execute (ret [posR + col ], vr );
824837 }
825838
839+ private void processRowLeft (final int ncol , final double [] ret , final int posR , final double vr ) {
840+ for (int col = 0 ; col < ncol ; col ++)
841+ ret [posR + col ] = _op .fn .execute (vr ,ret [posR + col ]);
842+ }
843+
826844 }
827845
828846 private static class BinaryMVColTaskSparse implements Callable <Long > {
@@ -835,14 +853,17 @@ private static class BinaryMVColTaskSparse implements Callable<Long> {
835853
836854 private MatrixBlock tmp ;
837855
856+ private boolean _left ;
857+
838858 protected BinaryMVColTaskSparse (CompressedMatrixBlock m1 , MatrixBlock m2 , MatrixBlock ret , int rl , int ru ,
839- BinaryOperator op ) {
859+ BinaryOperator op , boolean left ) {
840860 _m1 = m1 ;
841861 _m2 = m2 ;
842862 _ret = ret ;
843863 _op = op ;
844864 _rl = rl ;
845865 _ru = ru ;
866+ _left = left ;
846867 }
847868
848869 @ Override
@@ -851,10 +872,12 @@ public Long call() {
851872 final int _blklen = tmp .getNumRows ();
852873 final List <AColGroup > groups = _m1 .getColGroups ();
853874 final AIterator [] its = getIterators (groups , _rl );
854-
855- for (int r = _rl ; r < _ru ; r += _blklen )
856- processBlock (r , Math .min (r + _blklen , _ru ), groups , its );
857-
875+ if (!_left )
876+ for (int r = _rl ; r < _ru ; r += _blklen )
877+ processBlock (r , Math .min (r + _blklen , _ru ), groups , its );
878+ else
879+ for (int r = _rl ; r < _ru ; r += _blklen )
880+ processBlockLeft (r , Math .min (r + _blklen , _ru ), groups , its );
858881 return _ret .recomputeNonZeros (_rl , _ru - 1 );
859882 }
860883
@@ -864,6 +887,12 @@ private final void processBlock(final int rl, final int ru, final List<AColGroup
864887 tmp .reset ();
865888 }
866889
890+ private final void processBlockLeft (final int rl , final int ru , final List <AColGroup > groups , final AIterator [] its ) {
891+ decompressToTmpBlock (rl , ru , tmp .getDenseBlock (), groups , its );
892+ processDenseLeft (rl , ru );
893+ tmp .reset ();
894+ }
895+
867896 private final void processDense (final int rl , final int ru ) {
868897 final int nCol = _m1 .getNumColumns ();
869898 final SparseBlock sb = _ret .getSparseBlock ();
@@ -877,6 +906,20 @@ private final void processDense(final int rl, final int ru) {
877906
878907 }
879908 }
909+
910+ private final void processDenseLeft (final int rl , final int ru ) {
911+ final int nCol = _m1 .getNumColumns ();
912+ final SparseBlock sb = _ret .getSparseBlock ();
913+ final double [] _tmpDense = tmp .getDenseBlockValues ();
914+ final double [] _m2Dense = _m2 .getDenseBlockValues ();
915+ for (int row = rl ; row < ru ; row ++) {
916+ final double vr = _m2Dense [row ];
917+ final int tmpOff = (row - rl ) * nCol ;
918+ for (int col = 0 ; col < nCol ; col ++)
919+ sb .append (row , col , _op .fn .execute (vr , _tmpDense [tmpOff + col ]));
920+
921+ }
922+ }
880923 }
881924
882925 private static MatrixBlock allocateTempUncompressedBlock (int cols ) {
@@ -1033,48 +1076,6 @@ private final void processRightDense(final int rl, final int ru) {
10331076 }
10341077 }
10351078
1036- private static class BinaryMVColLeftTaskDense implements Callable <Long > {
1037- private final int _rl ;
1038- private final int _ru ;
1039- private final CompressedMatrixBlock _m1 ;
1040- private final MatrixBlock _m2 ;
1041- private final MatrixBlock _ret ;
1042- private final BinaryOperator _op ;
1043-
1044- protected BinaryMVColLeftTaskDense (CompressedMatrixBlock m1 , MatrixBlock m2 , MatrixBlock ret , int rl , int ru ,
1045- BinaryOperator op ) {
1046- _m1 = m1 ;
1047- _m2 = m2 ;
1048- _ret = ret ;
1049- _op = op ;
1050- _rl = rl ;
1051- _ru = ru ;
1052- }
1053-
1054- @ Override
1055- public Long call () {
1056- for (AColGroup g : _m1 .getColGroups ())
1057- g .decompressToDenseBlock (_ret .getDenseBlock (), _rl , _ru );
1058-
1059- // m2 is never sparse or empty. always dense here.
1060- final int ncol = _m1 .getNumColumns ();
1061- int offset = _rl * ncol ;
1062- double [] _retDense = _ret .getDenseBlockValues ();
1063- double [] _m2Dense = _m2 .getDenseBlockValues ();
1064- for (int row = _rl ; row < _ru ; row ++) {
1065- double vr = _m2Dense [row ];
1066- for (int col = 0 ; col < ncol ; col ++) {
1067- double v = _op .fn .execute (vr , _retDense [offset ]);
1068- _retDense [offset ] = v ;
1069- offset ++;
1070- }
1071- }
1072-
1073- return _ret .recomputeNonZeros (_rl , _ru - 1 );
1074-
1075- }
1076- }
1077-
10781079 private static abstract class BinaryMVRowTask implements Callable <AColGroup > {
10791080 protected final AColGroup _group ;
10801081 protected final double [] _v ;
0 commit comments