Skip to content

Commit eded2a5

Browse files
committed
Timed blocking of Kafka producer/consumer Close()
1 parent b7232d1 commit eded2a5

File tree

6 files changed

+139
-29
lines changed

6 files changed

+139
-29
lines changed

kafka.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Interfaces.
1515
type KafkaConsumer interface {
1616
Subscribe(topic string, rebalanceCb kafka.RebalanceCb) error
1717
ReadMessage(timeout time.Duration) (*kafka.Message, error)
18+
Events() chan kafka.Event
1819
Close() error
1920
}
2021

kafka_mock_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,19 @@ import (
1010
type consMockFuncs struct {
1111
Subscribe func(m *consMock, topic string, rebalanceCb kafka.RebalanceCb) error
1212
ReadMessage func(m *consMock, timeout time.Duration) (*kafka.Message, error)
13+
Events func(m *consMock) chan kafka.Event
1314
Close func(m *consMock) error
1415
}
1516

1617
type consMockCounts struct {
1718
Subscribe,
1819
ReadMessage,
20+
Events,
1921
Close concurrent.AtomicCounter
2022
}
2123

2224
type consMock struct {
25+
events chan kafka.Event
2326
rebalanceCallback kafka.RebalanceCb
2427
rebalanceEvents chan kafka.Event
2528
messages chan *kafka.Message
@@ -47,12 +50,20 @@ func (m *consMock) ReadMessage(timeout time.Duration) (*kafka.Message, error) {
4750
return m.f.ReadMessage(m, timeout)
4851
}
4952

53+
func (m *consMock) Events() chan kafka.Event {
54+
defer m.c.Events.Inc()
55+
return m.f.Events(m)
56+
}
57+
5058
func (m *consMock) Close() error {
5159
defer m.c.Close.Inc()
5260
return m.f.Close(m)
5361
}
5462

5563
func (m *consMock) fillDefaults() {
64+
if m.events == nil {
65+
m.events = make(chan kafka.Event)
66+
}
5667
if m.rebalanceEvents == nil {
5768
m.rebalanceEvents = make(chan kafka.Event)
5869
}
@@ -74,11 +85,17 @@ func (m *consMock) fillDefaults() {
7485
}
7586
}
7687
}
88+
if m.f.Events == nil {
89+
m.f.Events = func(m *consMock) chan kafka.Event {
90+
return m.events
91+
}
92+
}
7793
if m.f.Close == nil {
7894
m.f.Close = func(m *consMock) error {
7995
return nil
8096
}
8197
}
98+
m.c.Events = concurrent.NewAtomicCounter()
8299
m.c.Subscribe = concurrent.NewAtomicCounter()
83100
m.c.ReadMessage = concurrent.NewAtomicCounter()
84101
m.c.Close = concurrent.NewAtomicCounter()

neli.go

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,17 @@ type Neli interface {
3535
}
3636

3737
type neli struct {
38-
config Config
39-
scribe scribe.Scribe
40-
consumer KafkaConsumer
41-
producer KafkaProducer
42-
pollDeadline concurrent.Deadline
43-
lastReceived concurrent.AtomicCounter
44-
isAssigned concurrent.AtomicCounter
45-
isLeader concurrent.AtomicCounter
46-
barrier Barrier
47-
state concurrent.AtomicReference
48-
stateMutex sync.Mutex
38+
config Config
39+
scribe scribe.Scribe
40+
consumer KafkaConsumer
41+
producer KafkaProducer
42+
pollDeadline concurrent.Deadline
43+
lastReceived concurrent.AtomicCounter
44+
isAssigned concurrent.AtomicCounter
45+
isLeader concurrent.AtomicCounter
46+
barrier Barrier
47+
state concurrent.AtomicReference
48+
stateMutex sync.Mutex
4949
deliveryHandlerDone chan int
5050
}
5151

@@ -76,14 +76,14 @@ func New(config Config, barrier ...Barrier) (Neli, error) {
7676
return nil, err
7777
}
7878
n := &neli{
79-
config: config,
80-
scribe: config.Scribe,
81-
lastReceived: concurrent.NewAtomicCounter(),
82-
isAssigned: concurrent.NewAtomicCounter(),
83-
isLeader: concurrent.NewAtomicCounter(),
84-
barrier: barrierArg,
85-
pollDeadline: concurrent.NewDeadline(*config.MinPollInterval),
86-
state: concurrent.NewAtomicReference(Live),
79+
config: config,
80+
scribe: config.Scribe,
81+
lastReceived: concurrent.NewAtomicCounter(),
82+
isAssigned: concurrent.NewAtomicCounter(),
83+
isLeader: concurrent.NewAtomicCounter(),
84+
barrier: barrierArg,
85+
pollDeadline: concurrent.NewDeadline(*config.MinPollInterval),
86+
state: concurrent.NewAtomicReference(Live),
8787
deliveryHandlerDone: make(chan int),
8888
}
8989

@@ -137,7 +137,7 @@ func New(config Config, barrier ...Barrier) (Neli, error) {
137137
n.producer = p
138138
go func() {
139139
defer close(n.deliveryHandlerDone)
140-
140+
141141
for event := range p.Events() {
142142
switch e := event.(type) {
143143
case *kafka.Message:
@@ -367,15 +367,16 @@ func (n *neli) Close() error {
367367
defer func() {
368368
<-n.deliveryHandlerDone
369369
}()
370-
defer func() {
371-
go func() {
372-
// A bug in confluent-kafka-go (#463) occasionally causes an indefinite syscall hang in Close(), after it closes
373-
// the Events channel. So we delegate this to a separate goroutine — better an orphaned goroutine than a
374-
// frozen harvester. (The rest of the battery will still unwind normally.)
375-
n.producer.Close()
376-
}()
377-
}()
378-
return n.consumer.Close()
370+
371+
// A bug in confluent-kafka-go (#463) occasionally causes an indefinite syscall hang in Close(), after it closes
372+
// the Events channel. So we delegate this to a separate goroutine — better an orphaned goroutine than a
373+
// frozen harvester. (The rest of the battery will still unwind normally.)
374+
const closeTimeout = 10 * time.Second
375+
_, _ = performTimed(void(n.producer.Close), closeTimeout)
376+
377+
// Similarly to the above, Consumer.Close() may also hang, and we need to cope with this until #463 is resolved.
378+
_, err := performTimed(n.consumer.Close, closeTimeout)
379+
return err
379380
}
380381

381382
// Await the closing of this Neli instance.

neli_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,21 @@ func TestFatalErrorInProduce(t *testing.T) {
506506
assert.Equal(t, "Fatal error: simulated", err.Error())
507507
}
508508

509+
func TestCloseConsumerError(t *testing.T) {
510+
_, cons, _, config, _ := fixtures{}.create()
511+
cons.f.Close = func(m *consMock) error {
512+
return check.ErrSimulated
513+
}
514+
515+
n, err := New(config)
516+
require.Nil(t, err)
517+
518+
err = n.Close()
519+
require.Equal(t, check.ErrSimulated, err)
520+
521+
n.Await()
522+
}
523+
509524
func TestDeliveryReports(t *testing.T) {
510525
m, _, prod, config, _ := fixtures{}.create()
511526

timed.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package goneli
2+
3+
import (
4+
"errors"
5+
"time"
6+
7+
"github.com/obsidiandynamics/libstdgo/concurrent"
8+
)
9+
10+
var errPerformWithNoError = errors.New("")
11+
12+
func void(f func()) func() error {
13+
return func() error {
14+
f()
15+
return nil
16+
}
17+
}
18+
19+
func performTimed(f func() error, timeout time.Duration) (bool, error) {
20+
errorRef := concurrent.NewAtomicReference()
21+
go func() {
22+
err := f()
23+
if err != nil {
24+
errorRef.Set(err)
25+
} else {
26+
errorRef.Set(errPerformWithNoError)
27+
}
28+
}()
29+
30+
res := errorRef.Await(concurrent.RefNot(concurrent.RefNil()), timeout)
31+
if res == nil {
32+
return false, nil
33+
}
34+
if err := res.(error); err != errPerformWithNoError {
35+
return true, err
36+
}
37+
return true, nil
38+
}

timed_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package goneli
2+
3+
import (
4+
"sync"
5+
"testing"
6+
"time"
7+
8+
"github.com/obsidiandynamics/libstdgo/check"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func TestPerformTimed_noError(t *testing.T) {
13+
done, err := performTimed(void(func() {}), 10*time.Second)
14+
require.True(t, done)
15+
require.Nil(t, err)
16+
}
17+
18+
func TestPerformTimed_withError(t *testing.T) {
19+
done, err := performTimed(func() error {
20+
return check.ErrSimulated
21+
}, 10*time.Second)
22+
require.True(t, done)
23+
require.Equal(t, check.ErrSimulated, err)
24+
}
25+
26+
func TestPerformTimed_withTimeout(t *testing.T) {
27+
wg := sync.WaitGroup{}
28+
wg.Add(1)
29+
30+
done, err := performTimed(func() error {
31+
wg.Wait()
32+
return nil
33+
}, 1*time.Millisecond)
34+
require.False(t, done)
35+
require.Nil(t, err)
36+
37+
wg.Done()
38+
}

0 commit comments

Comments
 (0)