Skip to content

Commit bf20abb

Browse files
zuotingbingsrowen
authored andcommitted
[SPARK-22642][SQL] the createdTempDir will not be deleted if an exception occurs, should delete it with try-finally.
## What changes were proposed in this pull request? We found staging directories will not be dropped sometimes in our production environment. The createdTempDir will not be deleted if an exception occurs, we should delete createdTempDir with try-finally. This PR is follow-up SPARK-18703. ## How was this patch tested? exist tests Author: zuotingbing <[email protected]> Closes #19841 from zuotingbing/SPARK-stagedir.
1 parent 6cc7021 commit bf20abb

File tree

1 file changed

+30
-17
lines changed

1 file changed

+30
-17
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717

1818
package org.apache.spark.sql.hive.execution
1919

20+
import org.apache.hadoop.conf.Configuration
2021
import org.apache.hadoop.fs.Path
2122
import org.apache.hadoop.hive.ql.ErrorMsg
2223
import org.apache.hadoop.hive.ql.plan.TableDesc
2324

2425
import org.apache.spark.SparkException
2526
import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession}
26-
import org.apache.spark.sql.catalyst.catalog.CatalogTable
27+
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, ExternalCatalog}
2728
import org.apache.spark.sql.catalyst.expressions.Attribute
2829
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2930
import org.apache.spark.sql.execution.command.CommandUtils
@@ -91,6 +92,34 @@ case class InsertIntoHiveTable(
9192
)
9293
val tableLocation = hiveQlTable.getDataLocation
9394
val tmpLocation = getExternalTmpPath(sparkSession, hadoopConf, tableLocation)
95+
96+
try {
97+
processInsert(sparkSession, externalCatalog, hadoopConf, tableDesc, tmpLocation)
98+
} finally {
99+
// Attempt to delete the staging directory and the inclusive files. If failed, the files are
100+
// expected to be dropped at the normal termination of VM since deleteOnExit is used.
101+
deleteExternalTmpPath(hadoopConf)
102+
}
103+
104+
// un-cache this table.
105+
sparkSession.catalog.uncacheTable(table.identifier.quotedString)
106+
sparkSession.sessionState.catalog.refreshTable(table.identifier)
107+
108+
CommandUtils.updateTableStats(sparkSession, table)
109+
110+
// It would be nice to just return the childRdd unchanged so insert operations could be chained,
111+
// however for now we return an empty list to simplify compatibility checks with hive, which
112+
// does not return anything for insert operations.
113+
// TODO: implement hive compatibility as rules.
114+
Seq.empty[Row]
115+
}
116+
117+
private def processInsert(
118+
sparkSession: SparkSession,
119+
externalCatalog: ExternalCatalog,
120+
hadoopConf: Configuration,
121+
tableDesc: TableDesc,
122+
tmpLocation: Path): Unit = {
94123
val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
95124

96125
val numDynamicPartitions = partition.values.count(_.isEmpty)
@@ -231,21 +260,5 @@ case class InsertIntoHiveTable(
231260
overwrite,
232261
isSrcLocal = false)
233262
}
234-
235-
// Attempt to delete the staging directory and the inclusive files. If failed, the files are
236-
// expected to be dropped at the normal termination of VM since deleteOnExit is used.
237-
deleteExternalTmpPath(hadoopConf)
238-
239-
// un-cache this table.
240-
sparkSession.catalog.uncacheTable(table.identifier.quotedString)
241-
sparkSession.sessionState.catalog.refreshTable(table.identifier)
242-
243-
CommandUtils.updateTableStats(sparkSession, table)
244-
245-
// It would be nice to just return the childRdd unchanged so insert operations could be chained,
246-
// however for now we return an empty list to simplify compatibility checks with hive, which
247-
// does not return anything for insert operations.
248-
// TODO: implement hive compatibility as rules.
249-
Seq.empty[Row]
250263
}
251264
}

0 commit comments

Comments
 (0)