Skip to content

Commit e08d06b

Browse files
taropluscloud-fan
authored andcommitted
[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader
## What changes were proposed in this pull request? `ClassLoader` will preferentially load class from `parent`. Only when `parent` is null or the load failed, that it will call the overridden `findClass` function. To avoid the potential issue caused by loading class using inappropriate class loader, we should set the `parent` of `ClassLoader` to null, so that we can fully control which class loader is used. This is take over of apache#17074, the primary author of this PR is taroplus . Should close apache#17074 after this PR get merged. ## How was this patch tested? Add test case in `ExecutorClassLoaderSuite`. Author: Kohki Nishio <[email protected]> Author: Xingbo Jiang <[email protected]> Closes apache#18614 from jiangxb1987/executor_classloader.
1 parent 780586a commit e08d06b

File tree

2 files changed

+57
-6
lines changed

2 files changed

+57
-6
lines changed

repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
3333
import org.apache.spark.util.{ParentClassLoader, Utils}
3434

3535
/**
36-
* A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
37-
* used to load classes defined by the interpreter when the REPL is used.
38-
* Allows the user to specify if user class path should be first.
39-
* This class loader delegates getting/finding resources to parent loader,
40-
* which makes sense until REPL never provide resource dynamically.
36+
* A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
37+
* defined by the interpreter when the REPL is used. Allows the user to specify if user class path
38+
* should be first. This class loader delegates getting/finding resources to parent loader, which
39+
* makes sense until REPL never provide resource dynamically.
40+
*
41+
* Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
42+
* the load failed, that it will call the overridden `findClass` function. To avoid the potential
43+
* issue caused by loading class using inappropriate class loader, we should set the parent of
44+
* ClassLoader to null, so that we can fully control which class loader is used. For detailed
45+
* discussion, see SPARK-18646.
4146
*/
4247
class ExecutorClassLoader(
4348
conf: SparkConf,
4449
env: SparkEnv,
4550
classUri: String,
4651
parent: ClassLoader,
47-
userClassPathFirst: Boolean) extends ClassLoader with Logging {
52+
userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
4853
val uri = new URI(classUri)
4954
val directory = uri.getPath
5055

repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
2323
import java.nio.charset.StandardCharsets
2424
import java.nio.file.{Paths, StandardOpenOption}
2525
import java.util
26+
import java.util.Collections
27+
import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
2628

2729
import scala.io.Source
2830
import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
7779
}
7880
}
7981

82+
test("child over system classloader") {
83+
// JavaFileObject for scala.Option class
84+
val scalaOptionFile = new SimpleJavaFileObject(
85+
URI.create(s"string:///scala/Option.java"),
86+
JavaFileObject.Kind.SOURCE) {
87+
88+
override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
89+
"package scala; class Option {}"
90+
}
91+
}
92+
// compile fake scala.Option class
93+
ToolProvider
94+
.getSystemJavaCompiler
95+
.getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
96+
97+
// create 'scala' dir in tempDir1
98+
val scalaDir = new File(tempDir1, "scala")
99+
assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
100+
101+
// move the generated class into scala dir
102+
val filename = "Option.class"
103+
val result = new File(filename)
104+
assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
105+
106+
val out = new File(scalaDir, filename)
107+
Files.move(result, out)
108+
assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
109+
110+
// construct class loader tree
111+
val parentLoader = new URLClassLoader(urls2, null)
112+
val classLoader = new ExecutorClassLoader(
113+
new SparkConf(), null, url1, parentLoader, true)
114+
115+
// load 'scala.Option', using ClassforName to do the exact same behavior as
116+
// what JavaDeserializationStream does
117+
118+
// scalastyle:off classforname
119+
val optionClass = Class.forName("scala.Option", false, classLoader)
120+
// scalastyle:on classforname
121+
122+
assert(optionClass.getClassLoader == classLoader,
123+
"scala.Option didn't come from ExecutorClassLoader")
124+
}
125+
80126
test("child first") {
81127
val parentLoader = new URLClassLoader(urls2, null)
82128
val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)

0 commit comments

Comments
 (0)