Skip to content

Commit 8fc1edf

Browse files
committed
Merge pull request spark-jobserver#426 from zeitos/improvement/log_AbstractMethodError
Improving error handling
2 parents 3e8424e + 81f8d27 commit 8fc1edf

File tree

1 file changed

+42
-31
lines changed

1 file changed

+42
-31
lines changed

job-server/src/spark.jobserver/JobManagerActor.scala

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -269,40 +269,51 @@ class JobManagerActor(contextConfig: Config) extends InstrumentedActor {
269269
Future {
270270
org.slf4j.MDC.put("jobId", jobId)
271271
logger.info("Starting job future thread")
272-
273-
// Need to re-set the SparkEnv because it's thread-local and the Future runs on a diff thread
274-
SparkEnv.set(sparkEnv)
275-
276-
// Use the Spark driver's class loader as it knows about all our jars already
277-
// NOTE: This may not even be necessary if we set the driver ActorSystem classloader correctly
278-
Thread.currentThread.setContextClassLoader(jarLoader)
279-
val job = constructor()
280-
if (job.isInstanceOf[NamedObjectSupport]) {
281-
val namedObjects = job.asInstanceOf[NamedObjectSupport].namedObjectsPrivate
282-
if (namedObjects.get() == null) {
283-
namedObjects.compareAndSet(null, jobServerNamedObjects)
284-
}
285-
}
286-
287272
try {
288-
statusActor ! JobStatusActor.JobInit(jobInfo)
289-
290-
val jobC = jobContext.asInstanceOf[job.C]
291-
job.validate(jobC, jobConfig) match {
292-
case SparkJobInvalid(reason) => {
293-
val err = new Throwable(reason)
294-
statusActor ! JobValidationFailed(jobId, DateTime.now(), err)
295-
throw err
273+
// Need to re-set the SparkEnv because it's thread-local and the Future runs on a diff thread
274+
SparkEnv.set(sparkEnv)
275+
276+
// Use the Spark driver's class loader as it knows about all our jars already
277+
// NOTE: This may not even be necessary if we set the driver ActorSystem classloader correctly
278+
Thread.currentThread.setContextClassLoader(jarLoader)
279+
val job = constructor()
280+
if (job.isInstanceOf[NamedObjectSupport]) {
281+
val namedObjects = job.asInstanceOf[NamedObjectSupport].namedObjectsPrivate
282+
if (namedObjects.get() == null) {
283+
namedObjects.compareAndSet(null, jobServerNamedObjects)
296284
}
297-
case SparkJobValid => {
298-
statusActor ! JobStarted(jobId: String, contextName, jobInfo.startTime)
299-
val sc = jobContext.sparkContext
300-
sc.setJobGroup(jobId, s"Job group for $jobId and spark context ${sc.applicationId}", true)
301-
job.runJob(jobC, jobConfig)
285+
}
286+
287+
try {
288+
statusActor ! JobStatusActor.JobInit(jobInfo)
289+
290+
val jobC = jobContext.asInstanceOf[job.C]
291+
job.validate(jobC, jobConfig) match {
292+
case SparkJobInvalid(reason) => {
293+
val err = new Throwable(reason)
294+
statusActor ! JobValidationFailed(jobId, DateTime.now(), err)
295+
throw err
296+
}
297+
case SparkJobValid => {
298+
statusActor ! JobStarted(jobId: String, contextName, jobInfo.startTime)
299+
val sc = jobContext.sparkContext
300+
sc.setJobGroup(jobId, s"Job group for $jobId and spark context ${sc.applicationId}", true)
301+
job.runJob(jobC, jobConfig)
302+
}
302303
}
304+
} finally {
305+
org.slf4j.MDC.remove("jobId")
306+
}
307+
} catch {
308+
case e: java.lang.AbstractMethodError => {
309+
logger.error("Oops, there's an AbstractMethodError... maybe you compiled " +
310+
"your code with an older version of SJS? here's the exception:", e)
311+
throw e
303312
}
304-
} finally {
305-
org.slf4j.MDC.remove("jobId")
313+
case e: Throwable => {
314+
logger.error("Got Throwable", e)
315+
throw e
316+
};
306317
}
307318
}(executionContext).andThen {
308319
case Success(result: Any) =>
@@ -322,7 +333,7 @@ class JobManagerActor(contextConfig: Config) extends InstrumentedActor {
322333
val wrappedError = wrapInRuntimeException(error)
323334
// If and only if job validation fails, JobErroredOut message is dropped silently in JobStatusActor.
324335
statusActor ! JobErroredOut(jobId, DateTime.now(), wrappedError)
325-
logger.warn("Exception from job " + jobId + ": ", error)
336+
logger.error("Exception from job " + jobId + ": ", error)
326337
}(executionContext).andThen {
327338
case _ =>
328339
// Make sure to decrement the count of running jobs when a job finishes, in both success and failure

0 commit comments

Comments
 (0)