Skip to content

Commit 21a5092

Browse files
committed
[SYSTEMDS-3849] Perftest: Fix MVSM performance regression (spark ctx)
This patch fixes a perftest performance regression of runMSVM_10k_1k_dense_k5 which ran in 36s instead of few seconds in earlier releases. The reason was unnecessary spark context creation during parfor optimization. We now handle theses cluster info requests more carefully, which now avoids this unnecessary spark context creation and reduced the total runtime back to 5.9s.
1 parent 4d6c6b4 commit 21a5092

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

src/main/java/org/apache/sysds/runtime/controlprogram/context/SparkExecutionContext.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,11 @@ public static SparkConf createSystemDSSparkConf() {
340340
return conf;
341341
}
342342

343+
@SuppressWarnings("resource")
343344
public static boolean isLocalMaster() {
344-
return getSparkContextStatic().isLocal();
345+
return isSparkContextCreated() ?
346+
getSparkContextStatic().isLocal() :
347+
DMLScript.USE_LOCAL_SPARK_CONFIG;
345348
}
346349

347350
/**

src/main/java/org/apache/sysds/utils/stats/InfrastructureAnalyzer.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,13 @@ public static int getCkMaxCP() {
183183
* @return maximum remote parallelism constraint
184184
*/
185185
public static int getCkMaxMR() {
186+
//NOTE: we refresh only if there is already a spark context created
187+
// in order to avoid unnecessary spark context creation in local ops
188+
boolean refresh = SparkExecutionContext.isSparkContextCreated();
186189
if( OptimizerUtils.isSparkExecutionMode() )
187190
return SparkExecutionContext.isLocalMaster() ?
188191
InfrastructureAnalyzer.getLocalParallelism() :
189-
SparkExecutionContext.getDefaultParallelism(true);
192+
SparkExecutionContext.getDefaultParallelism(refresh);
190193
else
191194
return getRemoteParallelMapTasks();
192195
}
@@ -198,7 +201,6 @@ public static int getCkMaxMR() {
198201
*/
199202
public static long getCmMax() {
200203
//default value (if not specified)
201-
//TODO spark remote map task budget?
202204
return getLocalMaxMemory();
203205
}
204206

0 commit comments

Comments
 (0)