@@ -21,20 +21,20 @@ import java.io.{File, FileNotFoundException, IOException}
21
21
import java .nio .file .Files
22
22
import java .nio .file .attribute .PosixFilePermissions
23
23
import java .util .{Date , ServiceLoader }
24
- import java .util .concurrent .{ExecutorService , TimeUnit }
24
+ import java .util .concurrent .{ConcurrentHashMap , ExecutorService , Future , TimeUnit }
25
25
import java .util .zip .{ZipEntry , ZipOutputStream }
26
26
27
27
import scala .collection .JavaConverters ._
28
28
import scala .collection .mutable
29
+ import scala .concurrent .ExecutionException
29
30
import scala .io .Source
30
31
import scala .util .Try
31
32
import scala .xml .Node
32
33
33
34
import com .fasterxml .jackson .annotation .JsonIgnore
34
35
import com .google .common .io .ByteStreams
35
36
import com .google .common .util .concurrent .MoreExecutors
36
- import org .apache .hadoop .fs .{FileStatus , Path }
37
- import org .apache .hadoop .fs .permission .FsAction
37
+ import org .apache .hadoop .fs .{FileStatus , FileSystem , Path }
38
38
import org .apache .hadoop .hdfs .DistributedFileSystem
39
39
import org .apache .hadoop .hdfs .protocol .HdfsConstants
40
40
import org .apache .hadoop .security .AccessControlException
@@ -114,7 +114,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
114
114
" ; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS .toString)
115
115
116
116
private val hadoopConf = SparkHadoopUtil .get.newConfiguration(conf)
117
- private val fs = new Path (logDir).getFileSystem(hadoopConf)
117
+ // Visible for testing
118
+ private [history] val fs : FileSystem = new Path (logDir).getFileSystem(hadoopConf)
118
119
119
120
// Used by check event thread and clean log thread.
120
121
// Scheduled thread pool size must be one, otherwise it will have concurrent issues about fs
@@ -161,6 +162,25 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
161
162
new HistoryServerDiskManager (conf, path, listing, clock)
162
163
}
163
164
165
+ private val blacklist = new ConcurrentHashMap [String , Long ]
166
+
167
+ // Visible for testing
168
+ private [history] def isBlacklisted (path : Path ): Boolean = {
169
+ blacklist.containsKey(path.getName)
170
+ }
171
+
172
+ private def blacklist (path : Path ): Unit = {
173
+ blacklist.put(path.getName, clock.getTimeMillis())
174
+ }
175
+
176
+ /**
177
+ * Removes expired entries in the blacklist, according to the provided `expireTimeInSeconds`.
178
+ */
179
+ private def clearBlacklist (expireTimeInSeconds : Long ): Unit = {
180
+ val expiredThreshold = clock.getTimeMillis() - expireTimeInSeconds * 1000
181
+ blacklist.asScala.retain((_, creationTime) => creationTime >= expiredThreshold)
182
+ }
183
+
164
184
private val activeUIs = new mutable.HashMap [(String , Option [String ]), LoadedAppUI ]()
165
185
166
186
/**
@@ -418,7 +438,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
418
438
// reading a garbage file is safe, but we would log an error which can be scary to
419
439
// the end-user.
420
440
! entry.getPath().getName().startsWith(" ." ) &&
421
- SparkHadoopUtil .get.checkAccessPermission (entry, FsAction . READ )
441
+ ! isBlacklisted (entry.getPath )
422
442
}
423
443
.filter { entry =>
424
444
try {
@@ -461,32 +481,37 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
461
481
logDebug(s " New/updated attempts found: ${updated.size} ${updated.map(_.getPath)}" )
462
482
}
463
483
464
- val tasks = updated.map { entry =>
484
+ val tasks = updated.flatMap { entry =>
465
485
try {
466
- replayExecutor.submit(new Runnable {
486
+ val task : Future [ Unit ] = replayExecutor.submit(new Runnable {
467
487
override def run (): Unit = mergeApplicationListing(entry, newLastScanTime, true )
468
- })
488
+ }, Unit )
489
+ Some (task -> entry.getPath)
469
490
} catch {
470
491
// let the iteration over the updated entries break, since an exception on
471
492
// replayExecutor.submit (..) indicates the ExecutorService is unable
472
493
// to take any more submissions at this time
473
494
case e : Exception =>
474
495
logError(s " Exception while submitting event log for replay " , e)
475
- null
496
+ None
476
497
}
477
- }.filter(_ != null )
498
+ }
478
499
479
500
pendingReplayTasksCount.addAndGet(tasks.size)
480
501
481
502
// Wait for all tasks to finish. This makes sure that checkForLogs
482
503
// is not scheduled again while some tasks are already running in
483
504
// the replayExecutor.
484
- tasks.foreach { task =>
505
+ tasks.foreach { case ( task, path) =>
485
506
try {
486
507
task.get()
487
508
} catch {
488
509
case e : InterruptedException =>
489
510
throw e
511
+ case e : ExecutionException if e.getCause.isInstanceOf [AccessControlException ] =>
512
+ // We don't have read permissions on the log file
513
+ logWarning(s " Unable to read log $path" , e.getCause)
514
+ blacklist(path)
490
515
case e : Exception =>
491
516
logError(" Exception while merging application listings" , e)
492
517
} finally {
@@ -779,6 +804,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
779
804
listing.delete(classOf [LogInfo ], log.logPath)
780
805
}
781
806
}
807
+ // Clean the blacklist from the expired entries.
808
+ clearBlacklist(CLEAN_INTERVAL_S )
782
809
}
783
810
784
811
/**
@@ -938,13 +965,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
938
965
}
939
966
940
967
private def deleteLog (log : Path ): Unit = {
941
- try {
942
- fs.delete(log, true )
943
- } catch {
944
- case _ : AccessControlException =>
945
- logInfo(s " No permission to delete $log, ignoring. " )
946
- case ioe : IOException =>
947
- logError(s " IOException in cleaning $log" , ioe)
968
+ if (isBlacklisted(log)) {
969
+ logDebug(s " Skipping deleting $log as we don't have permissions on it. " )
970
+ } else {
971
+ try {
972
+ fs.delete(log, true )
973
+ } catch {
974
+ case _ : AccessControlException =>
975
+ logInfo(s " No permission to delete $log, ignoring. " )
976
+ case ioe : IOException =>
977
+ logError(s " IOException in cleaning $log" , ioe)
978
+ }
948
979
}
949
980
}
950
981
0 commit comments