Skip to content

Commit 4e71a47

Browse files
shibdRobertIndie
andauthored
[fix] [client] fix same producer/consumer use more than one connection per broker (#1323)
* [fix] [client] fix same producer/consumer use more than one connection per broker * Fix lint * Apply suggestions from code review Co-authored-by: Zike Yang <[email protected]> * Address comment * Addressd comment * make lint --------- Co-authored-by: Zike Yang <[email protected]>
1 parent ffba2a8 commit 4e71a47

File tree

8 files changed

+111
-24
lines changed

8 files changed

+111
-24
lines changed

pulsar/consumer_partition.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ type partitionConsumer struct {
141141
state uAtomic.Int32
142142
options *partitionConsumerOpts
143143

144-
conn atomic.Pointer[internal.Connection]
144+
conn atomic.Pointer[internal.Connection]
145+
cnxKeySuffix int32
145146

146147
topic string
147148
name string
@@ -351,6 +352,7 @@ func newPartitionConsumer(parent Consumer, client *client, options *partitionCon
351352
parentConsumer: parent,
352353
client: client,
353354
options: options,
355+
cnxKeySuffix: client.cnxPool.GenerateRoundRobinIndex(),
354356
topic: options.topic,
355357
name: options.consumerName,
356358
consumerID: client.rpcClient.NewConsumerID(),
@@ -1964,7 +1966,7 @@ func (pc *partitionConsumer) grabConn(assignedBrokerURL string) error {
19641966
cmdSubscribe.ForceTopicCreation = proto.Bool(false)
19651967
}
19661968

1967-
res, err := pc.client.rpcClient.Request(lr.LogicalAddr, lr.PhysicalAddr, requestID,
1969+
res, err := pc.client.rpcClient.RequestWithCnxKeySuffix(lr.LogicalAddr, lr.PhysicalAddr, pc.cnxKeySuffix, requestID,
19681970
pb.BaseCommand_SUBSCRIBE, cmdSubscribe)
19691971

19701972
if err != nil {
@@ -1975,7 +1977,7 @@ func (pc *partitionConsumer) grabConn(assignedBrokerURL string) error {
19751977
ConsumerId: proto.Uint64(pc.consumerID),
19761978
RequestId: proto.Uint64(requestID),
19771979
}
1978-
_, _ = pc.client.rpcClient.Request(lr.LogicalAddr, lr.PhysicalAddr, requestID,
1980+
_, _ = pc.client.rpcClient.RequestWithCnxKeySuffix(lr.LogicalAddr, lr.PhysicalAddr, pc.cnxKeySuffix, requestID,
19791981
pb.BaseCommand_CLOSE_CONSUMER, cmdClose)
19801982
}
19811983
return err

pulsar/consumer_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5038,3 +5038,31 @@ func TestClientVersion(t *testing.T) {
50385038
assert.True(t, strings.HasSuffix(publisher.ClientVersion, "-test-client"))
50395039

50405040
}
5041+
5042+
func TestSelectConnectionForSameConsumer(t *testing.T) {
5043+
client, err := NewClient(ClientOptions{
5044+
URL: serviceURL,
5045+
MaxConnectionsPerBroker: 10,
5046+
})
5047+
assert.NoError(t, err)
5048+
defer client.Close()
5049+
5050+
topicName := newTopicName()
5051+
5052+
_consumer, err := client.Subscribe(ConsumerOptions{
5053+
Topic: topicName,
5054+
SubscriptionName: "sub-1",
5055+
Type: Shared,
5056+
})
5057+
assert.NoError(t, err)
5058+
defer _consumer.Close()
5059+
5060+
partitionConsumerImpl := _consumer.(*consumer).consumers[0]
5061+
conn := partitionConsumerImpl._getConn()
5062+
5063+
for i := 0; i < 5; i++ {
5064+
assert.NoError(t, partitionConsumerImpl.grabConn(""))
5065+
assert.Equal(t, conn.ID(), partitionConsumerImpl._getConn().ID(),
5066+
"The consumer uses a different connection when reconnecting")
5067+
}
5068+
}

pulsar/internal/connection_pool.go

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@ import (
3232
// ConnectionPool is a interface of connection pool.
3333
type ConnectionPool interface {
3434
// GetConnection get a connection from ConnectionPool.
35-
GetConnection(logicalAddr *url.URL, physicalAddr *url.URL) (Connection, error)
35+
GetConnection(logicalAddr *url.URL, physicalAddr *url.URL, keySuffix int32) (Connection, error)
3636

3737
// GetConnections get all connections in the pool.
3838
GetConnections() map[string]Connection
3939

40+
// GenerateRoundRobinIndex generates a round-robin index.
41+
GenerateRoundRobinIndex() int32
42+
4043
// Close all the connections in the pool
4144
Close()
4245
}
@@ -47,8 +50,8 @@ type connectionPool struct {
4750
connectionTimeout time.Duration
4851
tlsOptions *TLSOptions
4952
auth auth.Provider
50-
maxConnectionsPerHost int32
51-
roundRobinCnt int32
53+
maxConnectionsPerHost uint32
54+
roundRobinCnt uint32
5255
keepAliveInterval time.Duration
5356
closeCh chan struct{}
5457

@@ -73,7 +76,7 @@ func NewConnectionPool(
7376
tlsOptions: tlsOptions,
7477
auth: auth,
7578
connectionTimeout: connectionTimeout,
76-
maxConnectionsPerHost: int32(maxConnectionsPerHost),
79+
maxConnectionsPerHost: uint32(maxConnectionsPerHost),
7780
keepAliveInterval: keepAliveInterval,
7881
log: logger,
7982
metrics: metrics,
@@ -84,9 +87,12 @@ func NewConnectionPool(
8487
return p
8588
}
8689

87-
func (p *connectionPool) GetConnection(logicalAddr *url.URL, physicalAddr *url.URL) (Connection, error) {
88-
p.log.WithField("logicalAddr", logicalAddr).WithField("physicalAddr", physicalAddr).Debug("Getting pooled connection")
89-
key := p.getMapKey(logicalAddr, physicalAddr)
90+
func (p *connectionPool) GetConnection(logicalAddr *url.URL, physicalAddr *url.URL,
91+
keySuffix int32) (Connection, error) {
92+
p.log.WithField("logicalAddr", logicalAddr).
93+
WithField("physicalAddr", physicalAddr).
94+
WithField("keySuffix", keySuffix).Debug("Getting pooled connection")
95+
key := fmt.Sprint(logicalAddr.Host, "-", physicalAddr.Host, "-", keySuffix)
9096

9197
p.Lock()
9298
conn, ok := p.connections[key]
@@ -141,6 +147,10 @@ func (p *connectionPool) GetConnections() map[string]Connection {
141147
return conns
142148
}
143149

150+
func (p *connectionPool) GenerateRoundRobinIndex() int32 {
151+
return int32(atomic.AddUint32(&p.roundRobinCnt, 1) % p.maxConnectionsPerHost)
152+
}
153+
144154
func (p *connectionPool) Close() {
145155
p.Lock()
146156
close(p.closeCh)
@@ -151,15 +161,6 @@ func (p *connectionPool) Close() {
151161
p.Unlock()
152162
}
153163

154-
func (p *connectionPool) getMapKey(logicalAddr *url.URL, physicalAddr *url.URL) string {
155-
cnt := atomic.AddInt32(&p.roundRobinCnt, 1)
156-
if cnt < 0 {
157-
cnt = -cnt
158-
}
159-
idx := cnt % p.maxConnectionsPerHost
160-
return fmt.Sprint(logicalAddr.Host, "-", physicalAddr.Host, "-", idx)
161-
}
162-
163164
func (p *connectionPool) checkAndCleanIdleConnections(maxIdleTime time.Duration) {
164165
if maxIdleTime < 0 {
165166
return

pulsar/internal/lookup_service_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ func (c *mockedLookupRPCClient) Request(logicalAddr *url.URL, physicalAddr *url.
108108
}, nil
109109
}
110110

111+
func (c *mockedLookupRPCClient) RequestWithCnxKeySuffix(_ *url.URL, _ *url.URL,
112+
_ int32, _ uint64, _ pb.BaseCommand_Type, _ proto.Message) (*RPCResult, error) {
113+
assert.Fail(c.t, "Shouldn't be called")
114+
return nil, nil
115+
}
116+
111117
func (c *mockedLookupRPCClient) RequestOnCnx(_ Connection, _ uint64, _ pb.BaseCommand_Type,
112118
_ proto.Message) (*RPCResult, error) {
113119
assert.Fail(c.t, "Shouldn't be called")
@@ -492,6 +498,12 @@ func (m mockedPartitionedTopicMetadataRPCClient) Request(_ *url.URL, _ *url.URL,
492498
return nil, nil
493499
}
494500

501+
func (m *mockedPartitionedTopicMetadataRPCClient) RequestWithCnxKeySuffix(_ *url.URL, _ *url.URL,
502+
_ int32, _ uint64, _ pb.BaseCommand_Type, _ proto.Message) (*RPCResult, error) {
503+
assert.Fail(m.t, "Shouldn't be called")
504+
return nil, nil
505+
}
506+
495507
func (m mockedPartitionedTopicMetadataRPCClient) RequestOnCnxNoWait(_ Connection, _ pb.BaseCommand_Type,
496508
_ proto.Message) error {
497509
assert.Fail(m.t, "Shouldn't be called")

pulsar/internal/rpc_client.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ type RPCClient interface {
6464
RequestToHost(serviceNameResolver *ServiceNameResolver, requestID uint64,
6565
cmdType pb.BaseCommand_Type, message proto.Message) (*RPCResult, error)
6666

67+
RequestWithCnxKeySuffix(logicalAddr *url.URL, physicalAddr *url.URL, cnxKeySuffix int32, requestID uint64,
68+
cmdType pb.BaseCommand_Type, message proto.Message) (*RPCResult, error)
69+
6770
Request(logicalAddr *url.URL, physicalAddr *url.URL, requestID uint64,
6871
cmdType pb.BaseCommand_Type, message proto.Message) (*RPCResult, error)
6972

@@ -154,7 +157,13 @@ func (c *rpcClient) RequestToHost(serviceNameResolver *ServiceNameResolver, requ
154157

155158
func (c *rpcClient) Request(logicalAddr *url.URL, physicalAddr *url.URL, requestID uint64,
156159
cmdType pb.BaseCommand_Type, message proto.Message) (*RPCResult, error) {
157-
cnx, err := c.pool.GetConnection(logicalAddr, physicalAddr)
160+
return c.RequestWithCnxKeySuffix(logicalAddr, physicalAddr, c.pool.GenerateRoundRobinIndex(),
161+
requestID, cmdType, message)
162+
}
163+
164+
func (c *rpcClient) RequestWithCnxKeySuffix(logicalAddr *url.URL, physicalAddr *url.URL, cnxKeySuffix int32,
165+
requestID uint64, cmdType pb.BaseCommand_Type, message proto.Message) (*RPCResult, error) {
166+
cnx, err := c.pool.GetConnection(logicalAddr, physicalAddr, cnxKeySuffix)
158167
if err != nil {
159168
return nil, err
160169
}

pulsar/producer_partition.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ type partitionProducer struct {
9696
topic string
9797
log log.Logger
9898

99-
conn uAtomic.Value
99+
conn uAtomic.Value
100+
cnxKeySuffix int32
100101

101102
options *ProducerOptions
102103
producerName string
@@ -179,6 +180,7 @@ func newPartitionProducer(client *client, topic string, options *ProducerOptions
179180
client: client,
180181
topic: topic,
181182
log: logger,
183+
cnxKeySuffix: client.cnxPool.GenerateRoundRobinIndex(),
182184
options: options,
183185
producerID: client.rpcClient.NewProducerID(),
184186
dataChan: make(chan *sendRequest, maxPendingMessages),
@@ -301,7 +303,7 @@ func (p *partitionProducer) grabCnx(assignedBrokerURL string) error {
301303
cmdProducer.Metadata = toKeyValues(p.options.Properties)
302304
}
303305

304-
cnx, err := p.client.cnxPool.GetConnection(lr.LogicalAddr, lr.PhysicalAddr)
306+
cnx, err := p.client.cnxPool.GetConnection(lr.LogicalAddr, lr.PhysicalAddr, p.cnxKeySuffix)
305307
// registering the producer first in case broker sends commands in the middle
306308
if err != nil {
307309
p.log.Error("Failed to get connection")

pulsar/producer_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,3 +2574,34 @@ func TestProducerKeepReconnectingAndThenCallClose(t *testing.T) {
25742574
return true
25752575
}, 30*time.Second, 1*time.Second)
25762576
}
2577+
2578+
func TestSelectConnectionForSameProducer(t *testing.T) {
2579+
topicName := newTopicName()
2580+
2581+
client, err := NewClient(ClientOptions{
2582+
URL: serviceURL,
2583+
MaxConnectionsPerBroker: 10,
2584+
})
2585+
assert.NoError(t, err)
2586+
defer client.Close()
2587+
2588+
reconnectNum := uint(1)
2589+
_producer, err := client.CreateProducer(ProducerOptions{
2590+
Topic: topicName,
2591+
MaxReconnectToBroker: &reconnectNum,
2592+
})
2593+
assert.NoError(t, err)
2594+
defer _producer.Close()
2595+
2596+
partitionProducerImp := _producer.(*producer).producers[0].(*partitionProducer)
2597+
conn := partitionProducerImp._getConn()
2598+
2599+
for i := 0; i < 5; i++ {
2600+
partitionProducerImp.grabCnx("")
2601+
currentConn := partitionProducerImp._getConn()
2602+
assert.Equal(t, conn.ID(), currentConn.ID(),
2603+
"The producer uses a different connection when reconnecting")
2604+
}
2605+
2606+
client.Close()
2607+
}

pulsar/transaction_coordinator_client.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ type transactionHandler struct {
4646
tc *transactionCoordinatorClient
4747
state uAtomic.Int32
4848
conn uAtomic.Value
49+
cnxKeySuffix int32
4950
partition uint64
5051
closeCh chan any
5152
requestCh chan any
@@ -67,6 +68,7 @@ func (t *transactionHandler) getState() txnHandlerState {
6768
func (tc *transactionCoordinatorClient) newTransactionHandler(partition uint64) (*transactionHandler, error) {
6869
handler := &transactionHandler{
6970
tc: tc,
71+
cnxKeySuffix: tc.client.cnxPool.GenerateRoundRobinIndex(),
7072
partition: partition,
7173
closeCh: make(chan any),
7274
requestCh: make(chan any),
@@ -95,8 +97,8 @@ func (t *transactionHandler) grabConn() error {
9597
TcId: proto.Uint64(t.partition),
9698
}
9799

98-
res, err := t.tc.client.rpcClient.Request(lr.LogicalAddr, lr.PhysicalAddr, requestID,
99-
pb.BaseCommand_TC_CLIENT_CONNECT_REQUEST, &cmdTCConnect)
100+
res, err := t.tc.client.rpcClient.RequestWithCnxKeySuffix(lr.LogicalAddr, lr.PhysicalAddr, t.cnxKeySuffix,
101+
requestID, pb.BaseCommand_TC_CLIENT_CONNECT_REQUEST, &cmdTCConnect)
100102

101103
if err != nil {
102104
t.log.WithError(err).Error("Failed to connect transaction_impl coordinator " +

0 commit comments

Comments
 (0)