Skip to content

Commit 0e3cce1

Browse files
committed
[MINOR] Increase Memory Estimate for Frames
This commit increases the default estimate of frame size. Previously, frames were estimated similarly to Matrices. The wrong estimate leads to problems on frames of > Integer rows. To improve it, this commit defaults to 8 character strings on all cells. In an unread matrix. Since there is no way of knowing if the input Frame contains longer strings, it is still a subpar estimate. However, it is an improvement overestimating everything as a dense double Matrix. (The change happened because I encountered very incorrect estimates in BEWARE)
1 parent 29b4d92 commit 0e3cce1

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

src/main/java/org/apache/sysds/hops/DataOp.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,8 @@ public boolean allowsAllExecTypes()
359359
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
360360
{
361361
double ret = 0;
362-
363-
if ( getDataType() == DataType.SCALAR )
362+
final DataType dt = getDataType();
363+
if ( dt == DataType.SCALAR )
364364
{
365365
switch( getValueType() )
366366
{
@@ -379,6 +379,11 @@ protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
379379
ret = 0;
380380
}
381381
}
382+
else if(dt == DataType.FRAME) {
383+
if(_op == OpOpData.PERSISTENTREAD || _op == OpOpData.TRANSIENTREAD) {
384+
ret = OptimizerUtils.estimateSizeExactFrame(dim1, dim2);
385+
}
386+
}
382387
else //MATRIX / FRAME
383388
{
384389
if( _op == OpOpData.PERSISTENTREAD

src/main/java/org/apache/sysds/hops/OptimizerUtils.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.apache.sysds.runtime.util.IndexRange;
6565
import org.apache.sysds.runtime.util.UtilFunctions;
6666
import org.apache.sysds.utils.stats.InfrastructureAnalyzer;
67+
import org.apache.sysds.utils.MemoryEstimates;
6768

6869
public class OptimizerUtils
6970
{
@@ -788,6 +789,15 @@ public static long estimateSizeExactSparsity(long nrows, long ncols, long nnz)
788789
double sp = getSparsity(nrows, ncols, nnz);
789790
return estimateSizeExactSparsity(nrows, ncols, sp);
790791
}
792+
793+
794+
public static long estimateSizeExactFrame(long nRows, long nCols){
795+
if(nRows > Integer.MAX_VALUE)
796+
return Long.MAX_VALUE;
797+
798+
// assuming String arrays and on average 8 characters per value.
799+
return (long)MemoryEstimates.stringArrayCost((int)nRows, 8) * nCols;
800+
}
791801

792802
/**
793803
* Estimates the footprint (in bytes) for an in-memory representation of a

0 commit comments

Comments
 (0)