ifilonenko
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
Lines changed: 1 addition & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
Lines changed: 4 additions & 3 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
Lines changed: 4 additions & 3 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
Lines changed: 8 additions & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
Lines changed: 8 additions & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
Lines changed: 17 additions & 8 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
Lines changed: 17 additions & 8 deletions
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
Lines changed: 19 additions & 3 deletions b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
Lines changed: 19 additions & 3 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
Lines changed: 19 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
Lines changed: 19 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
Lines changed: 11 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
Lines changed: 11 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
Lines changed: 2 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
Lines changed: 2 additions & 0 deletions
@@ -2390,7 +2390,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
 
         if (window.windowDuration == window.slideDuration) {
           val windowStruct = Alias(getWindow(0, 1), WINDOW_COL_NAME)(
-            exprId = windowAttr.exprId)
+            exprId = windowAttr.exprId, explicitMetadata = Some(metadata))
 
           val replacedPlan = p transformExpressions {
             case t: TimeWindow => windowAttr
 
@@ -222,16 +222,17 @@ object UnsupportedOperationChecker {
           joinType match {
 
             case _: InnerLike =>
-              if (left.isStreaming && right.isStreaming) {
-                throwError("Inner join between two streaming DataFrames/Datasets is not supported")
+              if (left.isStreaming && right.isStreaming &&
+                outputMode != InternalOutputModes.Append) {
+                throwError("Inner join between two streaming DataFrames/Datasets is not supported" +
+                  s" in ${outputMode} output mode, only in Append output mode")
               }
 
             case FullOuter =>
               if (left.isStreaming || right.isStreaming) {
                 throwError("Full outer joins with streaming DataFrames/Datasets are not supported")
               }
 
-
             case LeftOuter | LeftSemi | LeftAnti =>
               if (right.isStreaming) {
                 throwError("Left outer/semi/anti joins with a streaming DataFrame/Dataset " +
 
@@ -164,7 +164,14 @@ case class Alias(child: Expression, name: String)(
     }
   }
 
-  override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"
+  /** Used to signal the column used to calculate an eventTime watermark (e.g. a#1-T{delayMs}) */
+  private def delaySuffix = if (metadata.contains(EventTimeWatermark.delayKey)) {
+    s"-T${metadata.getLong(EventTimeWatermark.delayKey)}ms"
+  } else {
+    ""
+  }
+
+  override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix"
 
   override protected final def otherCopyArgs: Seq[AnyRef] = {
     exprId :: qualifier :: explicitMetadata :: Nil
 
@@ -45,14 +45,19 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
     case p: Union if p.children.forall(isEmptyLocalRelation) =>
       empty(p)
 
-    case p @ Join(_, _, joinType, _) if p.children.exists(isEmptyLocalRelation) => joinType match {
-      case _: InnerLike => empty(p)
-      // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule.
-      // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
-      case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p)
-      case RightOuter if isEmptyLocalRelation(p.right) => empty(p)
-      case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p)
-      case _ => p
+    // Joins on empty LocalRelations generated from streaming sources are not eliminated
+    // as stateful streaming joins need to perform other state management operations other than
+    // just processing the input data.
+    case p @ Join(_, _, joinType, _)
+        if !p.children.exists(_.isStreaming) && p.children.exists(isEmptyLocalRelation) =>
+      joinType match {
+        case _: InnerLike => empty(p)
+        // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule.
+        // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
+        case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p)
+        case RightOuter if isEmptyLocalRelation(p.right) => empty(p)
+        case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p)
+        case _ => p
     }
 
     case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmptyLocalRelation) => p match {
@@ -74,6 +79,10 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
       //
       // If the grouping expressions are empty, however, then the aggregate will always produce a
       // single output row and thus we cannot propagate the EmptyRelation.
+      //
+      // Aggregation on empty LocalRelation generated from a streaming source is not eliminated
+      // as stateful streaming aggregation need to perform other state management operations other
+      // than just processing the input data.
       case Aggregate(ge, _, _) if ge.nonEmpty && !p.isStreaming => empty(p)
       // Generators like Hive-style UDTF may return their records within `close`.
       case Generate(_: Explode, _, _, _, _, _) => empty(p)
 
@@ -383,11 +383,27 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     outputMode = Append
   )
 
-  // Inner joins: Stream-stream not supported
+  // Inner joins: Multiple stream-stream joins supported only in append mode
   testBinaryOperationInStreamingPlan(
-    "inner join",
+    "single inner join in append mode",
     _.join(_, joinType = Inner),
-    streamStreamSupported = false)
+    outputMode = Append,
+    streamStreamSupported = true)
+
+  testBinaryOperationInStreamingPlan(
+    "multiple inner joins in append mode",
+    (x: LogicalPlan, y: LogicalPlan) => {
+      x.join(y, joinType = Inner).join(streamRelation, joinType = Inner)
+    },
+    outputMode = Append,
+    streamStreamSupported = true)
+
+  testBinaryOperationInStreamingPlan(
+    "inner join in update mode",
+    _.join(_, joinType = Inner),
+    outputMode = Update,
+    streamStreamSupported = false,
+    expectedMsg = "inner join")
 
   // Full outer joins: only batch-batch is allowed
   testBinaryOperationInStreamingPlan(
 
@@ -18,15 +18,14 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Strategy
+import org.apache.spark.sql.{execution, AnalysisException, Strategy}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.exchange.ShuffleExchange
@@ -257,6 +256,24 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
+  object StreamingJoinStrategy extends Strategy {
+    override def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+      plan match {
+        case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
+          if left.isStreaming && right.isStreaming =>
+
+          new StreamingSymmetricHashJoinExec(
+            leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
+
+        case Join(left, right, _, _) if left.isStreaming && right.isStreaming =>
+          throw new AnalysisException(
+            "Stream stream joins without equality predicate is not supported", plan = Some(plan))
+
+        case _ => Nil
+      }
+    }
+  }
+
   /**
    * Used to plan the aggregate operator for expressions based on the AggregateFunction2 interface.
    */
 
@@ -54,6 +54,7 @@ class IncrementalExecution(
       sparkSession.sessionState.planner.strategies
 
     override def extraPlanningStrategies: Seq[Strategy] =
+      StreamingJoinStrategy ::
       StatefulAggregationStrategy ::
       FlatMapGroupsWithStateStrategy ::
       StreamingRelationStrategy ::
@@ -116,6 +117,16 @@ class IncrementalExecution(
           stateInfo = Some(nextStatefulOperationStateInfo),
           batchTimestampMs = Some(offsetSeqMetadata.batchTimestampMs),
           eventTimeWatermark = Some(offsetSeqMetadata.batchWatermarkMs))
+
+      case j: StreamingSymmetricHashJoinExec =>
+        j.copy(
+          stateInfo = Some(nextStatefulOperationStateInfo),
+          eventTimeWatermark = Some(offsetSeqMetadata.batchWatermarkMs),
+          stateWatermarkPredicates =
+            StreamingSymmetricHashJoinHelper.getStateWatermarkPredicates(
+              j.left.output, j.right.output, j.leftKeys, j.rightKeys, j.condition,
+              Some(offsetSeqMetadata.batchWatermarkMs))
+        )
     }
   }
 
 
@@ -297,6 +297,8 @@ class StreamExecution(
       val sparkSessionToRunBatches = sparkSession.cloneSession()
       // Adaptive execution can change num shuffle partitions, disallow
       sparkSessionToRunBatches.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+      // Disable cost-based join optimization as we do not want stateful operations to be rearranged
+      sparkSessionToRunBatches.conf.set(SQLConf.CBO_ENABLED.key, "false")
       offsetSeqMetadata = OffsetSeqMetadata(
         batchWatermarkMs = 0, batchTimestampMs = 0, sparkSessionToRunBatches.conf)
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,14 @@ case class Alias(child: Expression, name: String)(`
`164`	`164`	`}`
`165`	`165`	`}`
`166`	`166`
`167`		`- override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"`
	`167`	`+ /** Used to signal the column used to calculate an eventTime watermark (e.g. a#1-T{delayMs}) */`
	`168`	`+ private def delaySuffix = if (metadata.contains(EventTimeWatermark.delayKey)) {`
	`169`	`+ s"-T${metadata.getLong(EventTimeWatermark.delayKey)}ms"`
	`170`	`+ } else {`
	`171`	`+ ""`
	`172`	`+ }`
	`173`	`+`
	`174`	`+ override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix"`
`168`	`175`
`169`	`176`	`override protected final def otherCopyArgs: Seq[AnyRef] = {`
`170`	`177`	`exprId :: qualifier :: explicitMetadata :: Nil`