Skip to content

Commit 344ca0b

Browse files
committed
[MINOR] CLA Factory logging cleanup
1 parent 2e1147a commit 344ca0b

File tree

6 files changed

+62
-32
lines changed

6 files changed

+62
-32
lines changed

src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ public void allocateColGroup(AColGroup cg) {
226226
* @param colGroups new ColGroups in the MatrixBlock
227227
*/
228228
public void allocateColGroupList(List<AColGroup> colGroups) {
229+
cachedMemorySize = -1;
229230
_colGroups = colGroups;
230231
}
231232

@@ -351,7 +352,6 @@ public long recomputeNonZeros(int k) {
351352
List<Future<Long>> tasks = new ArrayList<>();
352353
for(AColGroup g : _colGroups)
353354
tasks.add(pool.submit(() -> g.getNumberNonZeros(rlen)));
354-
355355
long nnz = 0;
356356
for(Future<Long> t : tasks)
357357
nnz += t.get();
@@ -398,7 +398,6 @@ public long estimateSizeInMemory() {
398398
public long estimateCompressedSizeInMemory() {
399399

400400
if(cachedMemorySize <= -1L) {
401-
402401
long total = baseSizeInMemory();
403402
// take into consideration duplicate dictionaries
404403
Set<IDictionary> dicts = new HashSet<>();
@@ -413,7 +412,6 @@ public long estimateCompressedSizeInMemory() {
413412
}
414413
cachedMemorySize = total;
415414
return total;
416-
417415
}
418416
else
419417
return cachedMemorySize;
@@ -1002,6 +1000,10 @@ public MatrixBlock getUncompressed() {
10021000
return getUncompressed((String) null);
10031001
}
10041002

1003+
public MatrixBlock getUncompressed(int k){
1004+
return getUncompressed((String) null, k);
1005+
}
1006+
10051007
public MatrixBlock getUncompressed(String operation) {
10061008
return getUncompressed(operation,
10071009
ConfigurationManager.isParallelMatrixOperations() ? InfrastructureAnalyzer.getLocalParallelism() : 1);

src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
3838
import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed;
3939
import org.apache.sysds.runtime.compress.cost.ACostEstimate;
40+
import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
4041
import org.apache.sysds.runtime.compress.cost.CostEstimatorBuilder;
4142
import org.apache.sysds.runtime.compress.cost.CostEstimatorFactory;
4243
import org.apache.sysds.runtime.compress.cost.InstructionTypeCounter;
@@ -159,7 +160,7 @@ public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb,
159160
return compress(mb, k, compSettings, (WTreeRoot) null);
160161
}
161162

162-
public static Future<Void> compressAsync(ExecutionContext ec, String varName) {
163+
public static Future<Void> compressAsync(ExecutionContext ec, String varName) {
163164
return compressAsync(ec, varName, null);
164165
}
165166

@@ -168,7 +169,7 @@ public static Future<Void> compressAsync(ExecutionContext ec, String varName, In
168169
final ExecutorService pool = CommonThreadPool.get(); // We have to guarantee that a thread pool is allocated.
169170
return CompletableFuture.runAsync(() -> {
170171
// method call or code to be async
171-
try{
172+
try {
172173
CacheableData<?> data = ec.getCacheableData(varName);
173174
if(data instanceof MatrixObject) {
174175
MatrixObject mo = (MatrixObject) data;
@@ -178,10 +179,11 @@ public static Future<Void> compressAsync(ExecutionContext ec, String varName, In
178179
ExecutionContext.createCacheableData(mb);
179180
mo.acquireModify(mbc);
180181
mo.release();
182+
mbc.sum(); // calculate sum to forcefully materialize counts
181183
}
182184
}
183185
}
184-
finally{
186+
finally {
185187
pool.shutdown();
186188
}
187189
}, pool);
@@ -288,11 +290,16 @@ else if(mb instanceof CompressedMatrixBlock && ((CompressedMatrixBlock) mb).isOv
288290
_stats.originalSize = mb.getInMemorySize();
289291
_stats.originalCost = costEstimator.getCost(mb);
290292

293+
final double orgSum;
294+
if(CompressedMatrixBlock.debug)
295+
orgSum = mb.sum(k).getDouble(0, 0);
296+
else
297+
orgSum = 0;
291298
if(mb.isEmpty()) // empty input return empty compression
292299
return createEmpty();
293300

294301
res = new CompressedMatrixBlock(mb); // copy metadata and allocate soft reference
295-
302+
logInit();
296303
classifyPhase();
297304
if(compressionGroups == null)
298305
return abortCompression();
@@ -308,6 +315,12 @@ else if(mb instanceof CompressedMatrixBlock && ((CompressedMatrixBlock) mb).isOv
308315
if(res == null)
309316
return abortCompression();
310317

318+
if(CompressedMatrixBlock.debug) {
319+
final double afterComp = mb.sum(k).getDouble(0, 0);
320+
final double deltaSum = Math.abs(orgSum - afterComp);
321+
LOG.debug("compression Sum: Before:" + orgSum + " after: " + afterComp + " |delta|: " + deltaSum);
322+
}
323+
311324
return new ImmutablePair<>(res, _stats);
312325
}
313326

@@ -334,7 +347,8 @@ private void classifyPhase() {
334347
final double scale = Math.sqrt(nCols);
335348
final double threshold = _stats.estimatedCostCols / scale;
336349

337-
if(threshold < _stats.originalCost) {
350+
if(threshold < _stats.originalCost *
351+
((costEstimator instanceof ComputationCostEstimator) && !(mb instanceof CompressedMatrixBlock) ? 15 : 0.8)) {
338352
if(nCols > 1)
339353
coCodePhase();
340354
else // LOG a short cocode phase (since there is one column we don't cocode)
@@ -406,7 +420,7 @@ private void transposeHeuristics() {
406420
compSettings.transposed = false;
407421
break;
408422
default:
409-
compSettings.transposed = transposeHeuristics(compressionGroups.getNumberColGroups() , mb);
423+
compSettings.transposed = transposeHeuristics(compressionGroups.getNumberColGroups(), mb);
410424
}
411425
}
412426

@@ -442,20 +456,20 @@ private void finalizePhase() {
442456

443457
_stats.compressedSize = res.getInMemorySize();
444458
_stats.compressedCost = costEstimator.getCost(res.getColGroups(), res.getNumRows());
445-
446-
final double ratio = _stats.getRatio();
447-
final double denseRatio = _stats.getDenseRatio();
448-
449459
_stats.setColGroupsCounts(res.getColGroups());
450-
if(ratio < 1 && denseRatio < 100.0) {
460+
461+
if(_stats.compressedCost > _stats.originalCost) {
451462
LOG.info("--dense size: " + _stats.denseSize);
452463
LOG.info("--original size: " + _stats.originalSize);
453464
LOG.info("--compressed size: " + _stats.compressedSize);
454-
LOG.info("--compression ratio: " + ratio);
465+
LOG.info("--compression ratio: " + _stats.getRatio());
466+
LOG.info("--original Cost: " + _stats.originalCost);
467+
LOG.info("--Compressed Cost: " + _stats.compressedCost);
468+
LOG.info("--Cost Ratio: " + _stats.getCostRatio());
455469
LOG.debug("--col groups types " + _stats.getGroupsTypesString());
456470
LOG.debug("--col groups sizes " + _stats.getGroupsSizesString());
457471
logLengths();
458-
LOG.info("Abort block compression because compression ratio is less than 1.");
472+
LOG.info("Abort block compression because cost ratio is less than 1. ");
459473
res = null;
460474
setNextTimePhase(time.stop());
461475
DMLCompressionStatistics.addCompressionTime(getLastTimePhase(), phase);
@@ -472,9 +486,23 @@ private void finalizePhase() {
472486

473487
private Pair<MatrixBlock, CompressionStatistics> abortCompression() {
474488
LOG.warn("Compression aborted at phase: " + phase);
489+
if(mb instanceof CompressedMatrixBlock && mb.getInMemorySize() > _stats.denseSize) {
490+
MatrixBlock ucmb = ((CompressedMatrixBlock) mb).getUncompressed("Decompressing for abort: ", k);
491+
return new ImmutablePair<>(ucmb, _stats);
492+
}
475493
return new ImmutablePair<>(mb, _stats);
476494
}
477495

496+
private void logInit() {
497+
if(LOG.isDebugEnabled()) {
498+
LOG.debug("--Seed used for comp : " + compSettings.seed);
499+
LOG.debug(String.format("--number columns to compress: %10d", mb.getNumColumns()));
500+
LOG.debug(String.format("--number rows to compress : %10d", mb.getNumRows()));
501+
LOG.debug(String.format("--sparsity : %10.5f", mb.getSparsity()));
502+
LOG.debug(String.format("--nonZeros : %10d", mb.getNonZeros()));
503+
}
504+
}
505+
478506
private void logPhase() {
479507
setNextTimePhase(time.stop());
480508
DMLCompressionStatistics.addCompressionTime(getLastTimePhase(), phase);
@@ -486,7 +514,6 @@ private void logPhase() {
486514
else {
487515
switch(phase) {
488516
case 0:
489-
LOG.debug("--Seed used for comp : " + compSettings.seed);
490517
LOG.debug("--compression phase " + phase + " Classify : " + getLastTimePhase());
491518
LOG.debug("--Individual Columns Estimated Compression: " + _stats.estimatedSizeCols);
492519
if(mb instanceof CompressedMatrixBlock) {

src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@
3535
*/
3636
public class CompressionSettingsBuilder {
3737
private double samplingRatio;
38-
// private double samplePower = 0.6;
3938
private double samplePower = 0.65;
40-
// private double samplePower = 0.68;
41-
// private double samplePower = 0.7;
4239
private boolean allowSharedDictionary = false;
4340
private String transposeInput;
4441
private int seed = -1;

src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ public double getRatio() {
108108
return compressedSize == 0.0 ? Double.POSITIVE_INFINITY : (double) originalSize / compressedSize;
109109
}
110110

111+
public double getCostRatio() {
112+
return compressedSize == 0.0 ? Double.POSITIVE_INFINITY : (double) originalCost / compressedCost;
113+
}
114+
111115
public double getDenseRatio() {
112116
return compressedSize == 0.0 ? Double.POSITIVE_INFINITY : (double) denseSize / compressedSize;
113117
}
@@ -121,7 +125,7 @@ public String toString() {
121125
sb.append("\nCompressed Size : " + compressedSize);
122126
sb.append("\nCompressionRatio : " + getRatio());
123127
sb.append("\nDenseCompressionRatio : " + getDenseRatio());
124-
128+
125129
if(colGroupCounts != null) {
126130
sb.append("\nCompressionTypes : " + getGroupsTypesString());
127131
sb.append("\nCompressionGroupSizes : " + getGroupsSizesString());

src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDC.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
* This column group is handy in cases where sparse unsafe operations is executed on very sparse columns. Then the zeros
3535
* would be materialized in the group without any overhead.
3636
*/
37-
public abstract class ASDC extends AMorphingMMColGroup implements AOffsetsGroup , IContainDefaultTuple {
37+
public abstract class ASDC extends AMorphingMMColGroup implements AOffsetsGroup, IContainDefaultTuple {
3838
private static final long serialVersionUID = 769993538831949086L;
3939

4040
/** Sparse row indexes for the data */
@@ -62,7 +62,7 @@ public AOffset getOffsets() {
6262
@Override
6363
public final CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
6464
EstimationFactors ef = new EstimationFactors(getNumValues(), _numRows, getNumberOffsets(), _dict.getSparsity());
65-
return new CompressedSizeInfoColGroup(_colIndexes, ef, nRow, getCompType(),getEncoding());
65+
return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), getEncoding());
6666
}
6767

6868
@Override
@@ -74,12 +74,12 @@ public ICLAScheme getCompressionScheme() {
7474
public AColGroup morph(CompressionType ct, int nRow) {
7575
if(ct == getCompType())
7676
return this;
77-
else if (ct == CompressionType.SDCFOR)
77+
else if(ct == CompressionType.SDCFOR)
7878
return this; // it does not make sense to change to FOR.
7979
else
8080
return super.morph(ct, nRow);
8181
}
82-
82+
8383
@Override
8484
protected boolean allowShallowIdentityRightMult() {
8585
return false;

src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDCZero.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private final void leftMultByMatrixNoPreAggSingleRow(MatrixBlock mb, MatrixBlock
6767
AIterator it) {
6868
if(mb.isEmpty()) // early abort.
6969
return;
70-
70+
7171
final DenseBlock res = result.getDenseBlock();
7272
final double[] resV = res.values(r);
7373
final int offRet = res.pos(r);
@@ -108,11 +108,11 @@ private final void leftMultByMatrixNoPreAggSingleRowSparse(final SparseBlock sb,
108108
final int v = it.value();
109109
while(apos < alen && aix[apos] < v)
110110
apos++; // go though sparse block until offset start.
111-
if(cu < last)
111+
if(cu < last)
112112
leftMultByMatrixNoPreAggSingleRowSparseInside(v, it, apos, alen, aix, aval, resV, offRet, cu);
113-
else if(aix[alen - 1] < last)
113+
else if(aix[alen - 1] < last)
114114
leftMultByMatrixNoPreAggSingleRowSparseLessThan(v, it, apos, alen, aix, aval, resV, offRet);
115-
else
115+
else
116116
leftMultByMatrixNoPreAggSingleRowSparseTail(v, it, apos, alen, aix, aval, resV, offRet, cu, last);
117117
}
118118

@@ -245,7 +245,7 @@ public double[] getDefaultTuple() {
245245
@Override
246246
public final CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
247247
EstimationFactors ef = new EstimationFactors(getNumValues(), _numRows, getNumberOffsets(), _dict.getSparsity());
248-
return new CompressedSizeInfoColGroup(_colIndexes, ef, nRow, getCompType(), getEncoding());
248+
return new CompressedSizeInfoColGroup(_colIndexes, ef, this.estimateInMemorySize(), getCompType(), getEncoding());
249249
}
250250

251251
@Override
@@ -257,12 +257,12 @@ public ICLAScheme getCompressionScheme() {
257257
public AColGroup morph(CompressionType ct, int nRow) {
258258
if(ct == getCompType())
259259
return this;
260-
else if (ct == CompressionType.SDCFOR)
260+
else if(ct == CompressionType.SDCFOR)
261261
return this; // it does not make sense to change to FOR.
262262
else
263263
return super.morph(ct, nRow);
264264
}
265-
265+
266266
@Override
267267
protected boolean allowShallowIdentityRightMult() {
268268
return true;

0 commit comments

Comments
 (0)