Skip to content

Commit 5ac091a

Browse files
committed
[MINOR] Increase Memory Estimate for Frames
This commit increases the default estimate of frame size. Previously, frames were estimated similarly to Matrices. The wrong estimate leads to problems on frames of > Integer rows. To improve it, this commit defaults to 8 character strings on all cells. In an unread matrix. Since there is no way of knowing if the input Frame contains longer strings, it is still a subpar estimate. However, it is an improvement overestimating everything as a dense double Matrix. (The change happened because I encountered very incorrect estimates in BEWARE) Closes #2158 Signed-off-by: Sebastian Baunsgaard <[email protected]>
1 parent 31aff0e commit 5ac091a

File tree

4 files changed

+63
-3
lines changed

4 files changed

+63
-3
lines changed

src/main/java/org/apache/sysds/hops/DataOp.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,8 @@ public boolean allowsAllExecTypes()
359359
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
360360
{
361361
double ret = 0;
362-
363-
if ( getDataType() == DataType.SCALAR )
362+
final DataType dt = getDataType();
363+
if ( dt == DataType.SCALAR )
364364
{
365365
switch( getValueType() )
366366
{
@@ -379,6 +379,11 @@ protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
379379
ret = 0;
380380
}
381381
}
382+
else if(dt == DataType.FRAME) {
383+
if(_op == OpOpData.PERSISTENTREAD || _op == OpOpData.TRANSIENTREAD) {
384+
ret = OptimizerUtils.estimateSizeExactFrame(dim1, dim2);
385+
}
386+
}
382387
else //MATRIX / FRAME
383388
{
384389
if( _op == OpOpData.PERSISTENTREAD

src/main/java/org/apache/sysds/hops/OptimizerUtils.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.apache.sysds.runtime.util.IndexRange;
6565
import org.apache.sysds.runtime.util.UtilFunctions;
6666
import org.apache.sysds.utils.stats.InfrastructureAnalyzer;
67+
import org.apache.sysds.utils.MemoryEstimates;
6768

6869
public class OptimizerUtils
6970
{
@@ -788,6 +789,18 @@ public static long estimateSizeExactSparsity(long nrows, long ncols, long nnz)
788789
double sp = getSparsity(nrows, ncols, nnz);
789790
return estimateSizeExactSparsity(nrows, ncols, sp);
790791
}
792+
793+
794+
public static long estimateSizeExactFrame(long nRows, long nCols){
795+
// Currently we do not support frames larger than INT.
796+
// Therefore, we estimate their size to be extremely large.
797+
// The large size force spark operations.
798+
if(nRows > Integer.MAX_VALUE)
799+
return Long.MAX_VALUE;
800+
801+
// assuming String arrays and on average 8 characters per value.
802+
return (long)MemoryEstimates.stringArrayCost((int)nRows, 8) * nCols;
803+
}
791804

792805
/**
793806
* Estimates the footprint (in bytes) for an in-memory representation of a

src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ public void test() {
8787
assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble), 0.2);
8888
break;
8989
default:
90-
System.out.println(arrayToMeasure.getClass().getSimpleName());
9190
throw new NotImplementedException(arrayToMeasure + " not implemented");
9291
}
9392
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.sysds.test.component.misc;
21+
22+
import static org.junit.Assert.assertTrue;
23+
24+
import org.apache.sysds.hops.OptimizerUtils;
25+
import org.junit.Test;
26+
27+
public class OptimizerUtilsTest {
28+
29+
@Test
30+
public void estimateFrameSize() {
31+
Long size = OptimizerUtils.estimateSizeExactFrame(10, 10);
32+
assertTrue(size > 10 * 10);
33+
}
34+
35+
@Test
36+
public void estimateFrameSizeMoreRowsThanInt() {
37+
// Currently we do not support frames larger than INT. Therefore we estimate their size to be extremely large.
38+
// The large size force spark operations
39+
Long size = OptimizerUtils.estimateSizeExactFrame(Integer.MAX_VALUE + 1L, 10);
40+
41+
assertTrue(size == Long.MAX_VALUE);
42+
}
43+
}

0 commit comments

Comments
 (0)