lovoo · frairon · Feb 26, 2025 · Feb 25, 2025 · frairon · Feb 25, 2025
diff --git a/options.go b/options.go
@@ -363,7 +363,7 @@ func (opt *poptions) applyOptions(gg *GroupGraph, opts ...ProcessorOption) error
 	}
 
 	if globalConfig.Producer.RequiredAcks == sarama.NoResponse {
-		return fmt.Errorf("Processors do not work with `Config.Producer.RequiredAcks==sarama.NoResponse`, as it uses the response's offset to store the value")
+		return fmt.Errorf("processors do not work with `Config.Producer.RequiredAcks==sarama.NoResponse`, as it uses the response's offset to store the value")
 	}
 
 	if opt.builders.producer == nil {

diff --git a/partition_processor.go b/partition_processor.go
@@ -257,7 +257,7 @@ func (pp *PartitionProcessor) Start(setupCtx, ctx context.Context) error {
 		join := join
 		pp.runnerGroup.Go(func() error {
 			defer pp.state.SetState(PPStateStopping)
-			return join.CatchupForever(runnerCtx, false)
+			return join.CatchupForever(runnerCtx, true)
 		})
 	}
 
@@ -274,7 +274,7 @@ func (pp *PartitionProcessor) Start(setupCtx, ctx context.Context) error {
 			// (b) run the processor table in catchup mode so it keeps updating it's state.
 		case runModePassive:
 			if pp.table != nil {
-				err = pp.table.CatchupForever(runnerCtx, false)
+				err = pp.table.CatchupForever(runnerCtx, true)
 			}
 		default:
 			err = fmt.Errorf("processor has invalid run mode")
@@ -298,16 +298,16 @@ func (pp *PartitionProcessor) Stop() error {
 	pp.state.SetState(PPStateStopping)
 	defer pp.state.SetState(PPStateStopped)
 
-	close(pp.input)
-	close(pp.visitInput)
-
 	if pp.cancelRunnerGroup != nil {
 		pp.cancelRunnerGroup()
 	}
 
 	// wait for the runner to be done
 	runningErrs := multierror.Append(pp.runnerGroup.Wait().ErrorOrNil())
 
+	close(pp.input)
+	close(pp.visitInput)
+
 	// close all the tables
 	stopErrg, _ := multierr.NewErrGroup(context.Background())
 	for _, join := range pp.joins {
@@ -637,15 +637,6 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
 
 	var wg sync.WaitGroup
 
-	// drains the channel and drops out when closed.
-	// This is done when the processor shuts down during visit
-	// and makes sure the waitgroup is fully counted down.
-	drainUntilClose := func() {
-		for range pp.visitInput {
-			wg.Done()
-		}
-	}
-
 	// drains the input channel until there are no more items.
 	// does not wait for close, because the channel stays open for the next visit
 	drainUntilEmpty := func() {
@@ -662,6 +653,17 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
 		}
 	}
 
+	// register a channel that will close once the visitor itself is done.
+	visitDone := make(chan struct{})
+	defer close(visitDone)
+
+	// start a goroutine in the processor's runner-errgroup that prevents the broker from shutting down
+	// while the visitor is running.
+	pp.runnerGroup.Go(func() error {
+		<-visitDone
+		return nil
+	})
+
 	defer it.Release()
 
 	stopping, doneWaitingForStop := pp.stopping()
@@ -673,7 +675,7 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
 		wg.Add(1)
 		select {
 		case <-stopping:
-			drainUntilClose()
+			drainUntilEmpty()
 			wg.Done()
 			return ErrVisitAborted
 		case <-ctx.Done():
@@ -703,7 +705,7 @@ func (pp *PartitionProcessor) VisitValues(ctx context.Context, name string, meta
 	}()
 	select {
 	case <-stopping:
-		drainUntilClose()
+		drainUntilEmpty()
 		return ErrVisitAborted
 	case <-ctx.Done():
 		drainUntilEmpty()

diff --git a/processor.go b/processor.go
@@ -421,11 +421,11 @@ func (g *Processor) handleSessionErrors(ctx, sessionCtx context.Context, session
 			)
 
 			if errors.As(err, &errProc) {
-				g.log.Debugf("error processing message (non-transient), shutting down processor: %v", err)
+				g.log.Printf("error processing message (non-transient), shutting down processor: %v", err)
 				sessionCtxCancel()
 			}
 			if errors.As(err, &errSetup) {
-				g.log.Debugf("setup error (non-transient), shutting down processor: %v", err)
+				g.log.Printf("setup error (non-transient), shutting down processor: %v", err)
 				sessionCtxCancel()
 			}
 		}

diff --git a/systemtest/proc_disconnect_test.go b/systemtest/proc_disconnect_test.go
@@ -18,6 +18,7 @@ func TestProcessorShutdown_KafkaDisconnect(t *testing.T) {
 	brokers := initSystemTest(t)
 	var (
 		topic = goka.Stream(fmt.Sprintf("goka_systemtest_proc_shutdown_disconnect-%d", time.Now().Unix()))
+		join  = goka.Stream(fmt.Sprintf("goka_systemtest_proc_shutdown_disconnect-%d-join", time.Now().Unix()))
 		group = goka.Group(topic)
 	)
 
@@ -29,6 +30,7 @@ func TestProcessorShutdown_KafkaDisconnect(t *testing.T) {
 	tmgr, err := goka.DefaultTopicManagerBuilder(brokers)
 	require.NoError(t, err)
 	require.NoError(t, tmgr.EnsureStreamExists(string(topic), 10))
+	require.NoError(t, tmgr.EnsureTableExists(string(join), 10))
 
 	// emit values
 	errg.Go(func() error {
@@ -69,6 +71,7 @@ func TestProcessorShutdown_KafkaDisconnect(t *testing.T) {
 					ctx.SetValue(msg)
 				}
 			}),
+			goka.Join(goka.Table(join), new(codec.String)),
 			goka.Persist(new(codec.Int64)),
 		),
 		goka.WithConsumerGroupBuilder(goka.ConsumerGroupBuilderWithConfig(cfg)),

diff --git a/systemtest/processor_test.go b/systemtest/processor_test.go
@@ -305,6 +305,7 @@ func TestRebalance(t *testing.T) {
 	var (
 		group              = goka.Group(fmt.Sprintf("goka-systemtest-rebalance-%d", time.Now().Unix()))
 		inputStream string = string(group) + "-input"
+		joinTable   string = string(group) + "-join"
 		basepath           = "/tmp/goka-rebalance-test"
 	)
 
@@ -320,6 +321,9 @@ func TestRebalance(t *testing.T) {
 	err = tm.EnsureStreamExists(inputStream, 20)
 	require.NoError(t, err)
 
+	err = tm.EnsureTableExists(joinTable, 20)
+	require.NoError(t, err)
+
 	em, err := goka.NewEmitter(brokers, goka.Stream(inputStream), new(codec.String))
 	require.NoError(t, err)
 
@@ -338,6 +342,7 @@ func TestRebalance(t *testing.T) {
 			goka.DefineGroup(
 				group,
 				goka.Input(goka.Stream(inputStream), new(codec.String), func(ctx goka.Context, msg interface{}) { ctx.SetValue(msg) }),
+				goka.Join(goka.Table(joinTable), new(codec.String)),
 				goka.Persist(new(codec.String)),
 			),
 			goka.WithRecoverAhead(),

diff --git a/topic_manager.go b/topic_manager.go
@@ -89,7 +89,7 @@ func checkBroker(broker Broker, config *sarama.Config) error {
 	}
 
 	err := broker.Open(config)
-	if err != nil {
+	if err != nil && !errors.Is(err, sarama.ErrAlreadyConnected) {
 		return fmt.Errorf("error opening broker connection: %v", err)
 	}
 	connected, err := broker.Connected()