feat(jobserver): Add JMX metric for job cache (spark-jobserver#944)

bsikander · noorul · commit f9be1a4e1186 · 2017-10-23T14:33:30.000+05:30
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ spark-jobserver provides a RESTful interface for submitting and managing [Apache
 This repo contains the complete Spark job server project, including unit tests and deploy scripts.
 It was originally started at [Ooyala](http://www.ooyala.com), but this is now the main development repo.
 
-Other useful links: [Troubleshooting Tips](doc/troubleshooting.md), [Yarn tips](doc/yarn.md), [Mesos tips](doc/mesos.md).
+Other useful links: [Troubleshooting Tips](doc/troubleshooting.md), [Yarn tips](doc/yarn.md), [Mesos tips](doc/mesos.md), [JMX tips](doc/jmx.md).
 
 Also see [Chinese docs / 中文](doc/chinese/job-server.md).
 
diff --git a/doc/jmx.md b/doc/jmx.md
@@ -0,0 +1,12 @@
+#### JMX
+To read JMX metrics, you can use [jconsole](http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) if you have an option of GUI. Otherwise you can use a cli utility named `jmxcli`. Here are the steps
+
+- `wget https://github.com/downloads/vladimirvivien/jmx-cli/jmxcli-0.1.2-bin.zip`
+- `unzip jmxcli-0.1.2-bin.zip -d <folder>`
+- `cd <folder>`
+- execute `java -jar cli.jar`
+- Do `ps` to list all the JVMs
+- Connect with JVM using `connect pid:<pid_id>`
+- Use `list` to see all the possible mbeans
+- To get the current value of a metric use for example
+```exec bean:"\"spark.jobserver\":name=\"job-cache-size\",type=\"JobCacheImpl\"" get:Value```
diff --git a/job-server/src/main/scala/spark/jobserver/JobCache.scala b/job-server/src/main/scala/spark/jobserver/JobCache.scala
@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory
 import spark.jobserver.io.{BinaryType, JobDAOActor}
 import spark.jobserver.japi.BaseJavaJob
 import spark.jobserver.util.{ContextURLClassLoader, JarUtils, LRUCache}
+import spark.jobserver.common.akka.metrics.YammerMetrics
 
 import akka.pattern.ask
 
@@ -25,13 +26,15 @@ import scala.concurrent.Await
 class JobCacheImpl(maxEntries: Int,
                    dao: ActorRef,
                    sparkContext: SparkContext,
-                   loader: ContextURLClassLoader) extends JobCache {
+                   loader: ContextURLClassLoader) extends JobCache with YammerMetrics {
   import scala.concurrent.duration._
 
   private val cache = new LRUCache[(String, DateTime, String, BinaryType), BinaryJobInfo](maxEntries)
   private val logger = LoggerFactory.getLogger(getClass)
   implicit val daoAskTimeout: Timeout = Timeout(60 seconds)
 
+  val metricJobCache = gauge("job-cache-size", cache.size)
+
   /**
    * Retrieves the given SparkJob class from the cache if it's there, otherwise use the DAO to retrieve it.
    * @param appName the appName under which the binary was uploaded