Added a TODO and README comment in relation to Stream[_] and context-per-jvm=true

Leon Eller · Leon Eller · commit b2fd7833b44a · 2016-04-13T14:02:36.000+10:00
diff --git a/README.md b/README.md
@@ -621,7 +621,10 @@ serialized properly:
 - Anything that implements Product (Option, case classes) -- they will be serialized as lists
 - Maps and Seqs may contain nested values of any of the above
 - If a job result is of scala's Stream[Byte] type it will be serialised directly as a chunk encoded stream.
-  This is useful if your job result payload is large and may cause a timeout serialising as objects.
+  This is useful if your job result payload is large and may cause a timeout serialising as objects. Beware, this
+  will not currently work as desired with context-per-jvm=true configuration, since it would require serialising
+  Stream[_] blob between processes. For now use Stream[_] job results in context-per-jvm=false configuration, pending
+  potential future enhancements to support this in context-per-jvm=true mode.
 
 If we encounter a data type that is not supported, then the entire result will be serialized to a string.
 
diff --git a/job-server/src/spark.jobserver/JobManagerActor.scala b/job-server/src/spark.jobserver/JobManagerActor.scala
@@ -285,6 +285,15 @@ class JobManagerActor(contextConfig: Config) extends InstrumentedActor {
     }(executionContext).andThen {
       case Success(result: Any) =>
         statusActor ! JobFinished(jobId, DateTime.now())
+        // TODO: If the result is Stream[_] and this is running with context-per-jvm=true configuration
+        // serializing a Stream[_] blob across process boundaries is not desirable.
+        // In that scenario an enhancement is required here to chunk stream results back.
+        // Something like ChunkedJobResultStart, ChunkJobResultMessage, and ChunkJobResultEnd messages
+        // might be a better way to send results back and then on the other side use chunked encoding
+        // transfer to send the chunks back. Alternatively the stream could be persisted here to HDFS
+        // and the streamed out of InputStream on the other side.
+        // Either way an enhancement would be required here to make Stream[_] responses work
+        // with context-per-jvm=true configuration
         resultActor ! JobResult(jobId, result)
       case Failure(error: Throwable) =>
         // If and only if job validation fails, JobErroredOut message is dropped silently in JobStatusActor.