ifilonenko
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
Lines changed: 19 additions & 14 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
Lines changed: 19 additions & 14 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
Lines changed: 5 additions & 4 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
Lines changed: 5 additions & 4 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/commits/0
Lines changed: 2 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/commits/0
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/commits/1
Lines changed: 2 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/commits/1
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/metadata
Lines changed: 1 addition & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/metadata
Lines changed: 1 addition & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/offsets/0
Lines changed: 3 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/offsets/0
Lines changed: 3 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/offsets/1
Lines changed: 3 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/offsets/1
Lines changed: 3 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/state/0/0/1.delta
46 Bytes b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/state/0/0/1.delta
46 Bytes
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/state/0/0/2.delta
46 Bytes b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-2.3.1-without-commit-log-metadata/state/0/0/2.delta
46 Bytes
@@ -22,6 +22,9 @@ import java.nio.charset.StandardCharsets._
 
 import scala.io.{Source => IOSource}
 
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
+
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -43,36 +46,28 @@ import org.apache.spark.sql.SparkSession
  * line 2: metadata (optional json string)
  */
 class CommitLog(sparkSession: SparkSession, path: String)
-  extends HDFSMetadataLog[String](sparkSession, path) {
+  extends HDFSMetadataLog[CommitMetadata](sparkSession, path) {
 
   import CommitLog._
 
-  def add(batchId: Long): Unit = {
-    super.add(batchId, EMPTY_JSON)
-  }
-
-  override def add(batchId: Long, metadata: String): Boolean = {
-    throw new UnsupportedOperationException(
-      "CommitLog does not take any metadata, use 'add(batchId)' instead")
-  }
-
-  override protected def deserialize(in: InputStream): String = {
+  override protected def deserialize(in: InputStream): CommitMetadata = {
     // called inside a try-finally where the underlying stream is closed in the caller
     val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
     if (!lines.hasNext) {
       throw new IllegalStateException("Incomplete log file in the offset commit log")
     }
     parseVersion(lines.next.trim, VERSION)
-    EMPTY_JSON
+    val metadataJson = if (lines.hasNext) lines.next else EMPTY_JSON
+    CommitMetadata(metadataJson)
   }
 
-  override protected def serialize(metadata: String, out: OutputStream): Unit = {
+  override protected def serialize(metadata: CommitMetadata, out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
     out.write(s"v${VERSION}".getBytes(UTF_8))
     out.write('\n')
 
     // write metadata
-    out.write(EMPTY_JSON.getBytes(UTF_8))
+    out.write(metadata.json.getBytes(UTF_8))
   }
 }
 
@@ -81,3 +76,13 @@ object CommitLog {
   private val EMPTY_JSON = "{}"
 }
 
+
+case class CommitMetadata(nextBatchWatermarkMs: Long = 0) {
+  def json: String = Serialization.write(this)(CommitMetadata.format)
+}
+
+object CommitMetadata {
+  implicit val format = Serialization.formats(NoTypeHints)
+
+  def apply(json: String): CommitMetadata = Serialization.read[CommitMetadata](json)
+}
@@ -268,7 +268,7 @@ class MicroBatchExecution(
          * latest batch id in the offset log, then we can safely move to the next batch
          * i.e., committedBatchId + 1 */
         commitLog.getLatest() match {
-          case Some((latestCommittedBatchId, _)) =>
+          case Some((latestCommittedBatchId, commitMetadata)) =>
             if (latestBatchId == latestCommittedBatchId) {
               /* The last batch was successfully committed, so we can safely process a
                * new next batch but first:
@@ -286,7 +286,8 @@ class MicroBatchExecution(
               currentBatchId = latestCommittedBatchId + 1
               isCurrentBatchConstructed = false
               committedOffsets ++= availableOffsets
-              // Construct a new batch be recomputing availableOffsets
+              watermarkTracker.setWatermark(
+                math.max(watermarkTracker.currentWatermark, commitMetadata.nextBatchWatermarkMs))
             } else if (latestCommittedBatchId < latestBatchId - 1) {
               logWarning(s"Batch completion log latest batch id is " +
                 s"${latestCommittedBatchId}, which is not trailing " +
@@ -536,11 +537,11 @@ class MicroBatchExecution(
     }
 
     withProgressLocked {
-      commitLog.add(currentBatchId)
+      watermarkTracker.updateWatermark(lastExecution.executedPlan)
+      commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark))
       committedOffsets ++= availableOffsets
       awaitProgressLockCondition.signalAll()
     }
-    watermarkTracker.updateWatermark(lastExecution.executedPlan)
     logDebug(s"Completed batch ${currentBatchId}")
   }
 
 
@@ -314,7 +314,7 @@ class ContinuousExecution(
       // Record offsets before updating `committedOffsets`
       recordTriggerOffsets(from = committedOffsets, to = availableOffsets)
       if (queryExecutionThread.isAlive) {
-        commitLog.add(epoch)
+        commitLog.add(epoch, CommitMetadata())
         val offset =
           continuousSources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
         committedOffsets ++= Seq(continuousSources(0) -> offset)
 
@@ -0,0 +1,2 @@
+v1
+{}
@@ -0,0 +1,2 @@
+v1
+{}
@@ -0,0 +1 @@
+{"id":"73f7f943-0a08-4ffb-a504-9fa88ff7612a"}
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1531991874513,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1531991878604,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}
+1
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"id":"73f7f943-0a08-4ffb-a504-9fa88ff7612a"}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+v1`
	`2`	`+{"batchWatermarkMs":0,"batchTimestampMs":1531991874513,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}`
	`3`	`+0`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+v1`
	`2`	`+{"batchWatermarkMs":5000,"batchTimestampMs":1531991878604,"conf":{"spark.sql.shuffle.partitions":"5","spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider"}}`
	`3`	`+1`