Skip to content

Commit 72a439b

Browse files
committed
Added various functions which can be implemented by decoding the lzw mapping sequentially. reverted ColGroupDDC formatting again. Reverted CompressedSizeInfoColGroup formatting and adding DDCLZW part for testing. Added various tests for which functionality in the testing pipeline need to be added in order to work.
1 parent a8735e1 commit 72a439b

File tree

4 files changed

+553
-74
lines changed

4 files changed

+553
-74
lines changed

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, i
9292
throw new DMLCompressionException("Invalid length of the data. is zero");
9393

9494
if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
95-
throw new DMLCompressionException(
96-
"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
97-
dict.getNumberOfValues(colIndexes.size()));
95+
throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
96+
+ dict.getNumberOfValues(colIndexes.size()));
9897
int[] c = getCounts();
9998
if(c.length != dict.getNumberOfValues(colIndexes.size()))
10099
throw new DMLCompressionException("Invalid DDC Construction");
@@ -176,8 +175,8 @@ private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBl
176175
decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
177176
}
178177

179-
private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru,
180-
int offR, double[] values, AMapToData data) {
178+
private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
179+
double[] values, AMapToData data) {
181180
data.decompressToRange(c, rl, ru, offR, values);
182181

183182
}
@@ -376,17 +375,15 @@ private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock r
376375
return;
377376
else if(matrix.isInSparseFormat()) {
378377
if(cl != 0 || cu != _data.size())
379-
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
380-
cu);
378+
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
381379
else
382380
lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
383381
}
384382
else if(!matrix.getDenseBlock().isContiguous())
385-
lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru,
386-
cl, cu);
387-
else
388-
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl,
383+
lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
389384
cu);
385+
else
386+
lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
390387
}
391388

392389
private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
@@ -541,8 +538,7 @@ private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, in
541538
lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
542539
}
543540

544-
private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
545-
int cu) {
541+
private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
546542
final DenseBlock db = result.getDenseBlock();
547543
final SparseBlock sb = matrix.getSparseBlock();
548544

@@ -622,8 +618,7 @@ public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, i
622618
}
623619

624620
@Override
625-
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl,
626-
int cru) {
621+
public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
627622
if(_dict instanceof IdentityDictionary)
628623
identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
629624
else
@@ -677,8 +672,7 @@ private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, i
677672
}
678673
}
679674

680-
final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k,
681-
int vLen, DoubleVector vVec) {
675+
final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
682676
vVec = vVec.broadcast(aa);
683677
final int offj = k * jd;
684678
final int end = endT + offj;
@@ -991,8 +985,8 @@ private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos,
991985
}
992986
}
993987

994-
private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2,
995-
int pos2, int cl, int cu) {
988+
private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
989+
int cl, int cu) {
996990
IdentityDictionary a = (IdentityDictionary) _dict;
997991

998992
final int firstCol = pos2 + _colIndexes.get(0);
@@ -1135,8 +1129,7 @@ public AColGroup convertToDeltaDDC() {
11351129
if(i == 0) {
11361130
rowDelta[j] = val;
11371131
prevRow[j] = val;
1138-
}
1139-
else {
1132+
} else {
11401133
rowDelta[j] = val - prevRow[j];
11411134
prevRow[j] = val;
11421135
}

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java

Lines changed: 121 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -37,31 +37,22 @@
3737
import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
3838
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
3939
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
40-
import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex;
4140
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
4241
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
43-
import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator;
44-
import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
4542
import org.apache.sysds.runtime.compress.colgroup.scheme.DDCLZWScheme;
46-
import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme;
4743
import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
4844
import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
4945
import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
5046
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
51-
import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
5247
import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
5348
import org.apache.sysds.runtime.data.DenseBlock;
5449
import org.apache.sysds.runtime.data.SparseBlock;
5550
import org.apache.sysds.runtime.data.SparseBlockMCSR;
56-
import org.apache.sysds.runtime.data.SparseRow;
5751
import org.apache.sysds.runtime.functionobjects.*;
58-
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
5952
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
6053
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
61-
import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
6254
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
6355
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
64-
import org.jboss.netty.handler.codec.compression.CompressionException;
6556
import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
6657
import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
6758

@@ -571,41 +562,86 @@ public AColGroup sliceRows(int rl, int ru) {
571562
catch(Exception e) {
572563
throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
573564
}
574-
575-
/*try {
576-
AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
577-
return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
578-
}
579-
catch(Exception e) {
580-
throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
581-
}*/
582565
}
583566

584567
@Override
585-
protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
586-
throw new NotImplementedException();
568+
protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
569+
LZWMappingIterator it = new LZWMappingIterator();
570+
for(int i = 0; i < rl; i++) {
571+
it.next();
572+
}
573+
574+
for(int i = rl; i < ru; i++) {
575+
final int vr = it.next();
576+
if(sb.isEmpty(vr))
577+
continue;
578+
final int apos = sb.pos(vr);
579+
final int alen = sb.size(vr) + apos;
580+
final int[] aix = sb.indexes(vr);
581+
final double[] aval = sb.values(vr);
582+
for(int j = apos; j < alen; j++) {
583+
final int rowOut = _colIndexes.get(aix[j]);
584+
final double[] c = db.values(rowOut);
585+
final int off = db.pos(rowOut);
586+
c[off + i] += aval[j];
587+
}
588+
}
587589
}
588590

589591
@Override
590592
protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
591-
throw new NotImplementedException();
593+
ColGroupDDC g = (ColGroupDDC) convertToDDC();
594+
g.decompressToDenseBlockTransposedDenseDictionary(db, rl, ru, dict); // Possible implementation with iterator.
595+
592596
}
593597

594598
@Override
595-
protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict,
596-
int nColOut) {
597-
throw new NotImplementedException();
599+
protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
600+
601+
int[] colCounts = _dict.countNNZZeroColumns(getCounts());
602+
for(int j = 0; j < _colIndexes.size(); j++)
603+
sbr.allocate(_colIndexes.get(j), colCounts[j]);
604+
605+
LZWMappingIterator it = new LZWMappingIterator(); // Replace data.getIndex withiterator.
606+
607+
for(int i = 0; i < _nRows; i++) {
608+
int di = it.next();
609+
if(sb.isEmpty(di))
610+
continue;
611+
612+
final int apos = sb.pos(di);
613+
final int alen = sb.size(di) + apos;
614+
final int[] aix = sb.indexes(di);
615+
final double[] aval = sb.values(di);
616+
617+
for(int j = apos; j < alen; j++) {
618+
sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
619+
}
620+
}
598621
}
599622

600623
@Override
601624
protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
602-
throw new NotImplementedException();
625+
ColGroupDDC g = (ColGroupDDC) convertToDDC();
626+
g.decompressToSparseBlockTransposedDenseDictionary(db, dict, nColOut); // Possible implementation with iterator.
603627
}
604628

605629
@Override
606630
protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
607631
SparseBlock sb) {
608-
throw new NotImplementedException();
632+
LZWMappingIterator it = new LZWMappingIterator();
633+
for(int i = 0; i < rl; i++) {
634+
it.next(); // Skip to rl.
635+
}
636+
637+
for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
638+
final int vr = it.next();
639+
if(sb.isEmpty(vr))
640+
continue;
641+
final double[] c = db.values(offT);
642+
final int off = db.pos(offT) + offC;
643+
_colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
644+
}
609645
}
610646

611647
@Override
@@ -649,16 +685,45 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int
649685
@Override
650686
protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
651687
SparseBlock sb) {
652-
throw new NotImplementedException();
688+
LZWMappingIterator it = new LZWMappingIterator();
689+
for(int i = 0; i < rl; i++) {
690+
it.next();
691+
}
692+
693+
for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
694+
final int vr = it.next();
695+
if(sb.isEmpty(vr))
696+
continue;
697+
final int apos = sb.pos(vr);
698+
final int alen = sb.size(vr) + apos;
699+
final int[] aix = sb.indexes(vr);
700+
final double[] aval = sb.values(vr);
701+
for(int j = apos; j < alen; j++)
702+
ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
703+
}
653704
}
654705

655706
@Override
656707
protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
657708
double[] values) {
658-
throw new NotImplementedException();
709+
decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
659710
}
660711

661-
@Override
712+
protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
713+
double[] values, int nCol) {
714+
LZWMappingIterator it = new LZWMappingIterator();
715+
for(int i = 0; i < rl; i++) {
716+
it.next();
717+
}
718+
719+
for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
720+
final int rowIndex = it.next() * nCol;
721+
for(int j = 0; j < nCol; j++)
722+
ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
723+
}
724+
}
725+
726+
@Override // TODO: Implement! Pays of with LZW!
662727
public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
663728
convertToDDC().leftMultByMatrixNoPreAgg(matrix, result, rl, ru, cl, cu); // Fallback to DDC.
664729
}
@@ -681,12 +746,16 @@ public AColGroup unaryOperation(UnaryOperator op) {
681746

682747
@Override
683748
public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
684-
throw new NotImplementedException();
749+
IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
750+
751+
AMapToData data = decompressFull(_dataLZW, _nUnique, _nRows);
752+
return create(getColIndices(), ret, data, getCachedCounts());
685753
}
686754

687755
@Override
688756
public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
689-
throw new NotImplementedException();
757+
ColGroupDDC g = (ColGroupDDC) convertToDDC();
758+
return g.binaryRowOpRight(op, v, isRowSafe);
690759
}
691760

692761
public int[] appendDataLZWMap(int[] dataLZW) {
@@ -816,14 +885,31 @@ protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
816885
getCachedCounts());
817886
}
818887

819-
@Override
820-
protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
821-
throw new NotImplementedException();
888+
@Override // Correct ?
889+
public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
890+
final SparseBlock sb = selection.getSparseBlock();
891+
final SparseBlock retB = ret.getSparseBlock();
892+
for(int r = rl; r < ru; r++) {
893+
if(sb.isEmpty(r))
894+
continue;
895+
final int sPos = sb.pos(r);
896+
final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
897+
decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
898+
}
822899
}
823900

824-
@Override
901+
@Override // Correct ?
825902
protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
826-
throw new NotImplementedException(); // We need to implement decompToDenseBlock first!
903+
// morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
904+
final SparseBlock sb = selection.getSparseBlock();
905+
final DenseBlock retB = ret.getDenseBlock();
906+
for(int r = rl; r < ru; r++) {
907+
if(sb.isEmpty(r))
908+
continue;
909+
final int sPos = sb.pos(r);
910+
final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
911+
decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
912+
}
827913
}
828914

829915
@Override
@@ -906,12 +992,6 @@ protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
906992
c[rix] += preAgg[it.next()];
907993
}
908994

909-
/*protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
910-
AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
911-
for(int rix = rl; rix < ru; rix++)
912-
c[rix] += preAgg[data.getIndex(rix)];
913-
}*/
914-
915995
@Override
916996
protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
917997
final LZWMappingIterator it = new LZWMappingIterator();
@@ -922,14 +1002,6 @@ protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double
9221002
c[i] = builtin.execute(c[i], preAgg[it.next()]);
9231003
}
9241004

925-
/*@Override
926-
protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
927-
AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
928-
for(int rix = rl; rix < ru; rix++)
929-
c[rix] *= preAgg[data.getIndex(rix)];
930-
931-
}*/
932-
9331005
@Override
9341006
protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
9351007
final LZWMappingIterator it = new LZWMappingIterator();
@@ -940,4 +1012,3 @@ protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
9401012
c[rix] *= preAgg[it.next()];
9411013
}
9421014
}
943-

0 commit comments

Comments
 (0)