@@ -433,7 +433,7 @@ case class StreamingSymmetricHashJoinExec(
433433 }
434434
435435 val initIterFn = { () =>
436- val removedRowIter = joinerManager.leftSideJoiner.removeOldState ()
436+ val removedRowIter = joinerManager.leftSideJoiner.removeAndReturnOldState ()
437437 removedRowIter.filterNot { kv =>
438438 stateFormatVersion match {
439439 case 1 => matchesWithRightSideState(new UnsafeRowPair (kv.key, kv.value))
@@ -459,7 +459,7 @@ case class StreamingSymmetricHashJoinExec(
459459 }
460460
461461 val initIterFn = { () =>
462- val removedRowIter = joinerManager.rightSideJoiner.removeOldState ()
462+ val removedRowIter = joinerManager.rightSideJoiner.removeAndReturnOldState ()
463463 removedRowIter.filterNot { kv =>
464464 stateFormatVersion match {
465465 case 1 => matchesWithLeftSideState(new UnsafeRowPair (kv.key, kv.value))
@@ -484,13 +484,13 @@ case class StreamingSymmetricHashJoinExec(
484484 }
485485
486486 val leftSideInitIterFn = { () =>
487- val removedRowIter = joinerManager.leftSideJoiner.removeOldState ()
487+ val removedRowIter = joinerManager.leftSideJoiner.removeAndReturnOldState ()
488488 removedRowIter.filterNot(isKeyToValuePairMatched)
489489 .map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
490490 }
491491
492492 val rightSideInitIterFn = { () =>
493- val removedRowIter = joinerManager.rightSideJoiner.removeOldState ()
493+ val removedRowIter = joinerManager.rightSideJoiner.removeAndReturnOldState ()
494494 removedRowIter.filterNot(isKeyToValuePairMatched)
495495 .map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
496496 }
@@ -539,22 +539,19 @@ case class StreamingSymmetricHashJoinExec(
539539 // the outer side (e.g., left side for left outer join) while generating the outer "null"
540540 // outputs. Now, we have to remove unnecessary state rows from the other side (e.g., right
541541 // side for the left outer join) if possible. In all cases, nothing needs to be outputted,
542- // hence the removal needs to be done greedily by immediately consuming the returned
543- // iterator.
542+ // hence the removal needs to be done greedily.
544543 //
545544 // For full outer joins, we have already removed unnecessary states from both sides, so
546545 // nothing needs to be outputted here.
547- val cleanupIter = joinType match {
548- case Inner | LeftSemi => joinerManager.removeOldState()
549- case LeftOuter => joinerManager.rightSideJoiner.removeOldState()
550- case RightOuter => joinerManager.leftSideJoiner.removeOldState()
551- case FullOuter => Iterator .empty
552- case _ => throwBadJoinTypeException()
553- }
554- while (cleanupIter.hasNext) {
555- cleanupIter.next()
556- numRemovedStateRows += 1
557- }
546+ numRemovedStateRows += (
547+ joinType match {
548+ case Inner | LeftSemi => joinerManager.removeOldState()
549+ case LeftOuter => joinerManager.rightSideJoiner.removeOldState()
550+ case RightOuter => joinerManager.leftSideJoiner.removeOldState()
551+ case FullOuter => 0L
552+ case _ => throwBadJoinTypeException()
553+ }
554+ )
558555 }
559556
560557 // Commit all state changes and update state store metrics
@@ -643,7 +640,7 @@ case class StreamingSymmetricHashJoinExec(
643640 private [this ] val keyGenerator = UnsafeProjection .create(joinKeys, inputAttributes)
644641
645642 private [this ] val stateKeyWatermarkPredicateFunc = stateWatermarkPredicate match {
646- case Some (JoinStateKeyWatermarkPredicate (expr)) =>
643+ case Some (JoinStateKeyWatermarkPredicate (expr, _ )) =>
647644 // inputSchema can be empty as expr should only have BoundReferences and does not require
648645 // the schema to generated predicate. See [[StreamingSymmetricHashJoinHelper]].
649646 Predicate .create(expr, Seq .empty).eval _
@@ -652,7 +649,7 @@ case class StreamingSymmetricHashJoinExec(
652649 }
653650
654651 private [this ] val stateValueWatermarkPredicateFunc = stateWatermarkPredicate match {
655- case Some (JoinStateValueWatermarkPredicate (expr)) =>
652+ case Some (JoinStateValueWatermarkPredicate (expr, _ )) =>
656653 Predicate .create(expr, inputAttributes).eval _
657654 case _ =>
658655 Predicate .create(Literal (false ), Seq .empty).eval _ // false = do not remove if no predicate
@@ -792,6 +789,29 @@ case class StreamingSymmetricHashJoinExec(
792789 joinStateManager.get(key)
793790 }
794791
792+ // FIXME: doc!
793+ def removeOldState (): Long = {
794+ stateWatermarkPredicate match {
795+ case Some (JoinStateKeyWatermarkPredicate (_, stateWatermark)) =>
796+ joinStateManager match {
797+ case s : SupportsEvictByCondition =>
798+ s.evictByKeyCondition(stateKeyWatermarkPredicateFunc)
799+
800+ case s : SupportsEvictByTimestamp =>
801+ s.evictByTimestamp(stateWatermark)
802+ }
803+ case Some (JoinStateValueWatermarkPredicate (_, stateWatermark)) =>
804+ joinStateManager match {
805+ case s : SupportsEvictByCondition =>
806+ s.evictByValueCondition(stateValueWatermarkPredicateFunc)
807+
808+ case s : SupportsEvictByTimestamp =>
809+ s.evictByTimestamp(stateWatermark)
810+ }
811+ case _ => 0L
812+ }
813+ }
814+
795815 /**
796816 * Builds an iterator over old state key-value pairs, removing them lazily as they're produced.
797817 *
@@ -802,12 +822,24 @@ case class StreamingSymmetricHashJoinExec(
802822 * We do this to avoid requiring either two passes or full materialization when
803823 * processing the rows for outer join.
804824 */
805- def removeOldState (): Iterator [KeyToValuePair ] = {
825+ def removeAndReturnOldState (): Iterator [KeyToValuePair ] = {
806826 stateWatermarkPredicate match {
807- case Some (JoinStateKeyWatermarkPredicate (expr)) =>
808- joinStateManager.removeByKeyCondition(stateKeyWatermarkPredicateFunc)
809- case Some (JoinStateValueWatermarkPredicate (expr)) =>
810- joinStateManager.removeByValueCondition(stateValueWatermarkPredicateFunc)
827+ case Some (JoinStateKeyWatermarkPredicate (_, stateWatermark)) =>
828+ joinStateManager match {
829+ case s : SupportsEvictByCondition =>
830+ s.evictAndReturnByKeyCondition(stateKeyWatermarkPredicateFunc)
831+
832+ case s : SupportsEvictByTimestamp =>
833+ s.evictAndReturnByTimestamp(stateWatermark)
834+ }
835+ case Some (JoinStateValueWatermarkPredicate (_, stateWatermark)) =>
836+ joinStateManager match {
837+ case s : SupportsEvictByCondition =>
838+ s.evictAndReturnByValueCondition(stateValueWatermarkPredicateFunc)
839+
840+ case s : SupportsEvictByTimestamp =>
841+ s.evictAndReturnByTimestamp(stateWatermark)
842+ }
811843 case _ => Iterator .empty
812844 }
813845 }
@@ -836,8 +868,12 @@ case class StreamingSymmetricHashJoinExec(
836868 private case class OneSideHashJoinerManager (
837869 leftSideJoiner : OneSideHashJoiner , rightSideJoiner : OneSideHashJoiner ) {
838870
839- def removeOldState (): Iterator [KeyToValuePair ] = {
840- leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState()
871+ def removeOldState (): Long = {
872+ leftSideJoiner.removeOldState() + rightSideJoiner.removeOldState()
873+ }
874+
875+ def removeAndReturnOldState (): Iterator [KeyToValuePair ] = {
876+ leftSideJoiner.removeAndReturnOldState() ++ rightSideJoiner.removeAndReturnOldState()
841877 }
842878
843879 def metrics : StateStoreMetrics = {
0 commit comments