3737import org .apache .sysds .runtime .compress .colgroup .ColGroupConst ;
3838import org .apache .sysds .runtime .compress .colgroup .ColGroupDDC ;
3939import org .apache .sysds .runtime .compress .colgroup .ColGroupEmpty ;
40- import org .apache .sysds .runtime .compress .colgroup .ColGroupUncompressed ;
40+ import org .apache .sysds .runtime .compress .colgroup .ColGroupUncompressedArray ;
4141import org .apache .sysds .runtime .compress .colgroup .dictionary .ADictionary ;
4242import org .apache .sysds .runtime .compress .colgroup .dictionary .Dictionary ;
4343import org .apache .sysds .runtime .compress .colgroup .dictionary .IdentityDictionary ;
@@ -104,7 +104,9 @@ private MatrixBlock apply() throws Exception {
104104 final List <ColumnEncoderComposite > encoders = enc .getColumnEncoders ();
105105 final List <AColGroup > groups = isParallel () ? multiThread (encoders ) : singleThread (encoders );
106106 final int cols = shiftGroups (groups );
107- final MatrixBlock mb = new CompressedMatrixBlock (in .getNumRows (), cols , -1 , false , groups );
107+ final CompressedMatrixBlock mb = new CompressedMatrixBlock (in .getNumRows (), cols , -1 , false , groups );
108+
109+ combineUncompressed (mb );
108110 mb .setNonZeros (nnz .get ());
109111 logging (mb );
110112 return mb ;
@@ -193,7 +195,7 @@ private <T> AColGroup recodeToDummy(ColumnEncoderComposite c) throws Exception {
193195 if (containsNull && domain == 0 )
194196 return new ColGroupEmpty (ColIndexFactory .create (1 ));
195197 IColIndex colIndexes = ColIndexFactory .create (0 , domain );
196- if (domain == 1 && !containsNull ){
198+ if (domain == 1 && !containsNull ) {
197199 nnz .addAndGet (in .getNumRows ());
198200 return ColGroupConst .create (colIndexes , new double [] {1 });
199201 }
@@ -347,10 +349,10 @@ private <T> AColGroup recode(ColumnEncoderComposite c) throws Exception {
347349
348350 // int domain = c.getDomainSize();
349351 IColIndex colIndexes = ColIndexFactory .create (1 );
350- if (domain == 0 && containsNull ){
352+ if (domain == 0 && containsNull ) {
351353 return new ColGroupEmpty (colIndexes );
352354 }
353- if (domain == 1 && !containsNull ){
355+ if (domain == 1 && !containsNull ) {
354356 nnz .addAndGet (in .getNumRows ());
355357 return ColGroupConst .create (colIndexes , new double [] {1 });
356358 }
@@ -397,14 +399,7 @@ private <T> AColGroup passThroughNormal(ColumnEncoderComposite c, final IColInde
397399
398400 if (a .getValueType () != ValueType .BOOLEAN // if not booleans
399401 && (stats == null || !stats .shouldCompress || stats .valueType != a .getValueType ())) {
400- // stats.valueType;
401- double [] vals = (double []) a .changeType (ValueType .FP64 ).get ();
402-
403- MatrixBlock col = new MatrixBlock (a .size (), 1 , vals );
404- long nz = col .recomputeNonZeros (1 );
405-
406- nnz .addAndGet (nz );
407- return ColGroupUncompressed .create (colIndexes , col , false );
402+ return new ColGroupUncompressedArray (a , c ._colID - 1 ,colIndexes );
408403 }
409404 else {
410405 boolean containsNull = a .containsNull ();
@@ -532,10 +527,10 @@ private AColGroup hash(ColumnEncoderComposite c) {
532527 int domain = (int ) CEHash .getK ();
533528 boolean nulls = a .containsNull ();
534529 IColIndex colIndexes = ColIndexFactory .create (0 , 1 );
535- if (domain == 0 && nulls ){
530+ if (domain == 0 && nulls ) {
536531 return new ColGroupEmpty (colIndexes );
537532 }
538- if (domain == 1 && !nulls ){
533+ if (domain == 1 && !nulls ) {
539534 nnz .addAndGet (in .getNumRows ());
540535 return ColGroupConst .create (colIndexes , new double [] {1 });
541536 }
@@ -561,10 +556,10 @@ private AColGroup hashToDummy(ColumnEncoderComposite c) {
561556 int domain = (int ) CEHash .getK ();
562557 boolean nulls = a .containsNull ();
563558 IColIndex colIndexes = ColIndexFactory .create (0 , domain );
564- if (domain == 0 && nulls ){
559+ if (domain == 0 && nulls ) {
565560 return new ColGroupEmpty (ColIndexFactory .create (1 ));
566561 }
567- if (domain == 1 && !nulls ){
562+ if (domain == 1 && !nulls ) {
568563 nnz .addAndGet (in .getNumRows ());
569564 return ColGroupConst .create (colIndexes , new double [] {1 });
570565 }
@@ -609,6 +604,25 @@ private <T> void estimateRCDMapSize(ColumnEncoderComposite c) {
609604 c ._estNumDistincts = estDistCount ;
610605 }
611606
607+ private void combineUncompressed (CompressedMatrixBlock mb ) {
608+
609+ List <ColGroupUncompressedArray > ucg = new ArrayList <>();
610+ List <AColGroup > ret = new ArrayList <>();
611+ for (AColGroup g : mb .getColGroups ()) {
612+ if (g instanceof ColGroupUncompressedArray )
613+ ucg .add ((ColGroupUncompressedArray ) g );
614+ else
615+ ret .add (g );
616+ }
617+ ret .add (combine (ucg ));
618+ nnz .addAndGet (ret .get (ret .size ()-1 ).getNumberNonZeros (in .getNumRows ()));
619+ mb .allocateColGroupList (ret );
620+ }
621+
622+ private AColGroup combine (List <ColGroupUncompressedArray > ucg ) {
623+ throw new NotImplementedException ("Should combine " + ucg .size ());
624+ }
625+
612626 private void logging (MatrixBlock mb ) {
613627 if (LOG .isDebugEnabled ()) {
614628 LOG .debug (String .format ("Uncompressed transform encode Dense size: %16d" , mb .estimateSizeDenseInMemory ()));
0 commit comments