@@ -257,7 +257,7 @@ func (pp *PartitionProcessor) Start(setupCtx, ctx context.Context) error {
257257 join := join
258258 pp .runnerGroup .Go (func () error {
259259 defer pp .state .SetState (PPStateStopping )
260- return join .CatchupForever (runnerCtx , false )
260+ return join .CatchupForever (runnerCtx , true )
261261 })
262262 }
263263
@@ -274,7 +274,7 @@ func (pp *PartitionProcessor) Start(setupCtx, ctx context.Context) error {
274274 // (b) run the processor table in catchup mode so it keeps updating it's state.
275275 case runModePassive :
276276 if pp .table != nil {
277- err = pp .table .CatchupForever (runnerCtx , false )
277+ err = pp .table .CatchupForever (runnerCtx , true )
278278 }
279279 default :
280280 err = fmt .Errorf ("processor has invalid run mode" )
@@ -298,16 +298,16 @@ func (pp *PartitionProcessor) Stop() error {
298298 pp .state .SetState (PPStateStopping )
299299 defer pp .state .SetState (PPStateStopped )
300300
301- close (pp .input )
302- close (pp .visitInput )
303-
304301 if pp .cancelRunnerGroup != nil {
305302 pp .cancelRunnerGroup ()
306303 }
307304
308305 // wait for the runner to be done
309306 runningErrs := multierror .Append (pp .runnerGroup .Wait ().ErrorOrNil ())
310307
308+ close (pp .input )
309+ close (pp .visitInput )
310+
311311 // close all the tables
312312 stopErrg , _ := multierr .NewErrGroup (context .Background ())
313313 for _ , join := range pp .joins {
@@ -637,15 +637,6 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
637637
638638 var wg sync.WaitGroup
639639
640- // drains the channel and drops out when closed.
641- // This is done when the processor shuts down during visit
642- // and makes sure the waitgroup is fully counted down.
643- drainUntilClose := func () {
644- for range pp .visitInput {
645- wg .Done ()
646- }
647- }
648-
649640 // drains the input channel until there are no more items.
650641 // does not wait for close, because the channel stays open for the next visit
651642 drainUntilEmpty := func () {
@@ -662,6 +653,17 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
662653 }
663654 }
664655
656+ // register a channel that will close once the visitor itself is done.
657+ visitDone := make (chan struct {})
658+ defer close (visitDone )
659+
660+ // start a goroutine in the processor's runner-errgroup that prevents the broker from shutting down
661+ // while the visitor is running.
662+ pp .runnerGroup .Go (func () error {
663+ <- visitDone
664+ return nil
665+ })
666+
665667 defer it .Release ()
666668
667669 stopping , doneWaitingForStop := pp .stopping ()
@@ -673,7 +675,7 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
673675 wg .Add (1 )
674676 select {
675677 case <- stopping :
676- drainUntilClose ()
678+ drainUntilEmpty ()
677679 wg .Done ()
678680 return ErrVisitAborted
679681 case <- ctx .Done ():
@@ -703,7 +705,7 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
703705 }()
704706 select {
705707 case <- stopping :
706- drainUntilClose ()
708+ drainUntilEmpty ()
707709 return ErrVisitAborted
708710 case <- ctx .Done ():
709711 drainUntilEmpty ()
0 commit comments