bloomberg
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkContext.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
Lines changed: 3 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
Lines changed: 3 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
Lines changed: 133 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
Lines changed: 133 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
Lines changed: 8 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
Lines changed: 8 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/History.scala
Lines changed: 18 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/internal/config/History.scala
Lines changed: 18 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
Lines changed: 7 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
Lines changed: 7 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
Lines changed: 3 additions & 2 deletions
@@ -2355,7 +2355,8 @@ class SparkContext(config: SparkConf) extends Logging {
     // Note: this code assumes that the task scheduler has been initialized and has contacted
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
-      startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls))
+      startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls,
+      schedulerBackend.getDriverAttributes))
     _driverLogger.foreach(_.startSync(_hadoopConfiguration))
   }
 
 
@@ -359,10 +359,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         return None
     }
 
-    val ui = SparkUI.create(None, new AppStatusStore(kvstore), conf, secManager, app.info.name,
-      HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
-      attempt.info.startTime.getTime(),
-      attempt.info.appSparkVersion)
+    val ui = SparkUI.create(None, new HistoryAppStatusStore(conf, kvstore), conf, secManager,
+      app.info.name, HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
+      attempt.info.startTime.getTime(), attempt.info.appSparkVersion)
     loadPlugins().foreach(_.setupUI(ui))
 
     val loadedUI = LoadedAppUI(ui)
 
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.util.matching.Regex
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.History._
+import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.api.v1
+import org.apache.spark.util.kvstore.KVStore
+
+private[spark] class HistoryAppStatusStore(
+    conf: SparkConf,
+    store: KVStore)
+  extends AppStatusStore(store, None) with Logging {
+
+  import HistoryAppStatusStore._
+
+  private val logUrlPattern: Option[String] = {
+    val appInfo = super.applicationInfo()
+    val applicationCompleted = appInfo.attempts.nonEmpty && appInfo.attempts.head.completed
+    if (applicationCompleted || conf.get(APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP)) {
+      conf.get(CUSTOM_EXECUTOR_LOG_URL)
+    } else {
+      None
+    }
+  }
+
+  private val informedForMissingAttributes = new AtomicBoolean(false)
+
+  override def executorList(activeOnly: Boolean): Seq[v1.ExecutorSummary] = {
+    val execList = super.executorList(activeOnly)
+    logUrlPattern match {
+      case Some(pattern) => execList.map(replaceLogUrls(_, pattern))
+      case None => execList
+    }
+  }
+
+  override def executorSummary(executorId: String): v1.ExecutorSummary = {
+    val execSummary = super.executorSummary(executorId)
+    logUrlPattern match {
+      case Some(pattern) => replaceLogUrls(execSummary, pattern)
+      case None => execSummary
+    }
+  }
+
+  private def replaceLogUrls(exec: v1.ExecutorSummary, urlPattern: String): v1.ExecutorSummary = {
+    val attributes = exec.attributes
+
+    // Relation between pattern {{FILE_NAME}} and attribute {{LOG_FILES}}
+    // Given that HistoryAppStatusStore don't know which types of log files can be provided
+    // from resource manager, we require resource manager to provide available types of log
+    // files, which are encouraged to be same as types of log files provided in original log URLs.
+    // Once we get the list of log files, we need to expose them to end users as a pattern
+    // so that end users can compose custom log URL(s) including log file name(s).
+    val allPatterns = CUSTOM_URL_PATTERN_REGEX.findAllMatchIn(urlPattern).map(_.group(1)).toSet
+    val allPatternsExceptFileName = allPatterns.filter(_ != "FILE_NAME")
+    val allAttributeKeys = attributes.keySet
+    val allAttributeKeysExceptLogFiles = allAttributeKeys.filter(_ != "LOG_FILES")
+
+    if (allPatternsExceptFileName.diff(allAttributeKeysExceptLogFiles).nonEmpty) {
+      logFailToRenewLogUrls("some of required attributes are missing in app's event log.",
+        allPatternsExceptFileName, allAttributeKeys)
+      return exec
+    } else if (allPatterns.contains("FILE_NAME") && !allAttributeKeys.contains("LOG_FILES")) {
+      logFailToRenewLogUrls("'FILE_NAME' parameter is provided, but file information is " +
+        "missing in app's event log.", allPatternsExceptFileName, allAttributeKeys)
+      return exec
+    }
+
+    val updatedUrl = allPatternsExceptFileName.foldLeft(urlPattern) { case (orig, patt) =>
+      // we already checked the existence of attribute when comparing keys
+      orig.replace(s"{{$patt}}", attributes(patt))
+    }
+
+    val newLogUrlMap = if (allPatterns.contains("FILE_NAME")) {
+      // allAttributeKeys should contain "LOG_FILES"
+      attributes("LOG_FILES").split(",").map { file =>
+        file -> updatedUrl.replace("{{FILE_NAME}}", file)
+      }.toMap
+    } else {
+      Map("log" -> updatedUrl)
+    }
+
+    replaceExecutorLogs(exec, newLogUrlMap)
+  }
+
+  private def logFailToRenewLogUrls(
+      reason: String,
+      allPatterns: Set[String],
+      allAttributes: Set[String]): Unit = {
+    if (informedForMissingAttributes.compareAndSet(false, true)) {
+      logInfo(s"Fail to renew executor log urls: $reason. Required: $allPatterns / " +
+        s"available: $allAttributes. Falling back to show app's original log urls.")
+    }
+  }
+
+  private def replaceExecutorLogs(
+      source: v1.ExecutorSummary,
+      newExecutorLogs: Map[String, String]): v1.ExecutorSummary = {
+    new v1.ExecutorSummary(source.id, source.hostPort, source.isActive, source.rddBlocks,
+      source.memoryUsed, source.diskUsed, source.totalCores, source.maxTasks, source.activeTasks,
+      source.failedTasks, source.completedTasks, source.totalTasks, source.totalDuration,
+      source.totalGCTime, source.totalInputBytes, source.totalShuffleRead,
+      source.totalShuffleWrite, source.isBlacklisted, source.maxMemory, source.addTime,
+      source.removeTime, source.removeReason, newExecutorLogs, source.memoryMetrics,
+      source.blacklistedInStages, source.peakMemoryMetrics, source.attributes)
+  }
+
+}
+
+private[spark] object HistoryAppStatusStore {
+  val CUSTOM_URL_PATTERN_REGEX: Regex = "\\{\\{([A-Za-z0-9_\\-]+)\\}\\}".r
+}
@@ -60,7 +60,8 @@ private[spark] class CoarseGrainedExecutorBackend(
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       driver = Some(ref)
-      ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls))
+      ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls,
+        extractAttributes))
     }(ThreadUtils.sameThread).onComplete {
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       case Success(msg) =>
@@ -76,6 +77,12 @@ private[spark] class CoarseGrainedExecutorBackend(
       .map(e => (e._1.substring(prefix.length).toLowerCase(Locale.ROOT), e._2))
   }
 
+  def extractAttributes: Map[String, String] = {
+    val prefix = "SPARK_EXECUTOR_ATTRIBUTE_"
+    sys.env.filterKeys(_.startsWith(prefix))
+      .map(e => (e._1.substring(prefix.length).toUpperCase(Locale.ROOT), e._2))
+  }
+
   override def receive: PartialFunction[Any, Unit] = {
     case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
 
@@ -125,4 +125,22 @@ private[spark] object History {
   val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab")
     .stringConf
     .createOptional
+
+  val CUSTOM_EXECUTOR_LOG_URL = ConfigBuilder("spark.history.custom.executor.log.url")
+    .doc("Specifies custom spark executor log url for supporting external log service instead of " +
+      "using cluster managers' application log urls in the history server. Spark will support " +
+      "some path variables via patterns which can vary on cluster manager. Please check the " +
+      "documentation for your cluster manager to see which patterns are supported, if any. " +
+      "This configuration has no effect on a live application, it only affects the history server.")
+    .stringConf
+    .createOptional
+
+  val APPLY_CUSTOM_EXECUTOR_LOG_URL_TO_INCOMPLETE_APP =
+    ConfigBuilder("spark.history.custom.executor.log.url.applyIncompleteApplication")
+      .doc("Whether to apply custom executor log url, as specified by " +
+        "`spark.history.custom.executor.log.url`, to incomplete application as well. " +
+        "Even if this is true, this still only affects the behavior of the history server, " +
+        "not running spark applications.")
+      .booleanConf
+      .createWithDefault(true)
 }
@@ -69,6 +69,13 @@ private[spark] trait SchedulerBackend {
    */
   def getDriverLogUrls: Option[Map[String, String]] = None
 
+  /**
+   * Get the attributes on driver. These attributes are used to replace log URLs when
+   * custom log url pattern is specified.
+   * @return Map containing attributes on driver.
+   */
+  def getDriverAttributes: Option[Map[String, String]] = None
+
   /**
    * Get the max number of tasks that can be concurrent launched currently.
    * Note that please don't cache the value returned by this method, because the number can change
 
@@ -192,7 +192,8 @@ case class SparkListenerApplicationStart(
     time: Long,
     sparkUser: String,
     appAttemptId: Option[String],
-    driverLogs: Option[Map[String, String]] = None) extends SparkListenerEvent
+    driverLogs: Option[Map[String, String]] = None,
+    driverAttributes: Option[Map[String, String]] = None) extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent
 
@@ -63,7 +63,8 @@ private[spark] object CoarseGrainedClusterMessages {
       executorRef: RpcEndpointRef,
       hostname: String,
       cores: Int,
-      logUrls: Map[String, String])
+      logUrls: Map[String, String],
+      attributes: Map[String, String])
     extends CoarseGrainedClusterMessage
 
   case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
 
@@ -183,7 +183,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
-      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls) =>
+      case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls, attributes) =>
         if (executorDataMap.contains(executorId)) {
           executorRef.send(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
           context.reply(true)
@@ -207,7 +207,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
           val data = new ExecutorData(executorRef, executorAddress, hostname,
-            cores, cores, logUrls)
+            cores, cores, logUrls, attributes)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
 
@@ -34,5 +34,6 @@ private[cluster] class ExecutorData(
    override val executorHost: String,
    var freeCores: Int,
    override val totalCores: Int,
-   override val logUrlMap: Map[String, String]
-) extends ExecutorInfo(executorHost, totalCores, logUrlMap)
+   override val logUrlMap: Map[String, String],
+   override val attributes: Map[String, String]
+) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes)
Original file line number	Diff line number	Diff line change
`@@ -2355,7 +2355,8 @@ class SparkContext(config: SparkConf) extends Logging {`
`2355`	`2355`	`// Note: this code assumes that the task scheduler has been initialized and has contacted`
`2356`	`2356`	`// the cluster manager to get an application ID (in case the cluster manager provides one).`
`2357`	`2357`	`listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),`
`2358`		`- startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls))`
	`2358`	`+ startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls,`
	`2359`	`+ schedulerBackend.getDriverAttributes))`
`2359`	`2360`	`_driverLogger.foreach(_.startSync(_hadoopConfiguration))`
`2360`	`2361`	`}`
`2361`	`2362`