Skip to content

Commit 4375b8a

Browse files
authored
[SPARK-28098] Fix NPE when specifying parquet files instead of paths (#41)
1 parent cefa493 commit 4375b8a

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,20 @@ class InMemoryFileIndex(
9999
new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f)
100100
cachedLeafDirToChildrenFiles =
101101
if (readPartitionWithSubdirectoryEnabled) {
102-
files.toArray.groupBy(file => getRootPathsLeafDir(file.getPath.getParent))
102+
files.toArray.groupBy(file => getRootPathsLeafDir(file.getPath.getParent, file.getPath))
103103
} else {
104104
files.toArray.groupBy(_.getPath.getParent)
105105
}
106106
cachedPartitionSpec = null
107107
}
108108

109-
private def getRootPathsLeafDir(path: Path): Path = {
110-
if (rootPaths.contains(path)) {
109+
private def getRootPathsLeafDir(path: Path, child: Path): Path = {
110+
if (rootPaths.contains(child)) {
111+
path
112+
} else if (rootPaths.contains(path)) {
111113
path
112114
} else {
113-
getRootPathsLeafDir(path.getParent)
115+
getRootPathsLeafDir(path.getParent, path)
114116
}
115117
}
116118

0 commit comments

Comments
 (0)