Skip to content
Draft
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5fe0bfa
fix(pool): wip, pool reauth should not interfere with handoff
ndyakov Oct 14, 2025
d39da69
fix credListeners map
ndyakov Oct 14, 2025
8a629fb
fix race in tests
ndyakov Oct 14, 2025
6c54ab5
Merge branch 'master' into ndyakov/pool-reauth
ndyakov Oct 14, 2025
90bfdb3
better conn usable timeout
ndyakov Oct 14, 2025
07283ec
add design decision comment
ndyakov Oct 15, 2025
1bbf2e6
few small improvements
ndyakov Oct 15, 2025
1428068
update marked as queued
ndyakov Oct 15, 2025
e7dc339
add Used to clarify the state of the conn
ndyakov Oct 15, 2025
77c0c73
rename test
ndyakov Oct 15, 2025
011ef96
fix(test): fix flaky test
ndyakov Oct 15, 2025
e03396e
lock inside the listeners collection
ndyakov Oct 15, 2025
391b6c5
address pr comments
ndyakov Oct 15, 2025
6ad9a67
Update internal/auth/cred_listeners.go
ndyakov Oct 15, 2025
0c4f8fb
Update internal/pool/buffer_size_test.go
ndyakov Oct 15, 2025
acb55d8
wip refactor entraid
ndyakov Oct 16, 2025
d74671b
fix maintnotif pool hook
ndyakov Oct 17, 2025
0e10cd7
fix mocks
ndyakov Oct 17, 2025
afba8c2
fix nil listener
ndyakov Oct 17, 2025
4bc6d33
sync and async reauth based on conn lifecycle
ndyakov Oct 17, 2025
3020e3a
be able to reject connection OnGet
ndyakov Oct 17, 2025
f886775
pass hooks so the tests can observe reauth
ndyakov Oct 17, 2025
72cf74a
give some time for the background to execute commands
ndyakov Oct 17, 2025
c715185
fix tests
ndyakov Oct 17, 2025
f14095b
only async reauth
ndyakov Oct 17, 2025
e94cc9f
Merge branch 'master' into ndyakov/pool-reauth
ndyakov Oct 17, 2025
19f4080
Update internal/pool/pool.go
ndyakov Oct 17, 2025
4049d5e
Update internal/auth/streaming/pool_hook.go
ndyakov Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion auth/reauth_credentials_listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ func NewReAuthCredentialsListener(reAuth func(credentials Credentials) error, on
}

// Ensure ReAuthCredentialsListener implements the CredentialsListener interface.
var _ CredentialsListener = (*ReAuthCredentialsListener)(nil)
var _ CredentialsListener = (*ReAuthCredentialsListener)(nil)
10 changes: 6 additions & 4 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,12 @@ func isRedisError(err error) bool {

func isBadConn(err error, allowTimeout bool, addr string) bool {
switch err {
case nil:
return false
case context.Canceled, context.DeadlineExceeded:
return true
case nil:
return false
case context.Canceled, context.DeadlineExceeded:
return true
case pool.ErrConnUnusableTimeout:
return true
}

if isRedisError(err) {
Expand Down
124 changes: 124 additions & 0 deletions internal/auth/conn_reauth_credentials_listener.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package auth

import (
"runtime"
"time"

auth2 "github.com/redis/go-redis/v9/auth"
"github.com/redis/go-redis/v9/internal/pool"
)

// ConnReAuthCredentialsListener is a struct that implements the CredentialsListener interface.
// It is used to re-authenticate the credentials when they are updated.
// It holds reference to the connection to re-authenticate and will pass it to the reAuth and onErr callbacks.
// It contains:
// - reAuth: a function that takes the new credentials and returns an error if any.
// - onErr: a function that takes an error and handles it.
// - conn: the connection to re-authenticate.
// - checkUsableTimeout: the timeout to wait for the connection to be usable - default is 1 second.
type ConnReAuthCredentialsListener struct {
// reAuth is called when the credentials are updated.
reAuth func(conn *pool.Conn, credentials auth2.Credentials) error
// onErr is called when an error occurs.
onErr func(conn *pool.Conn, err error)
// conn is the connection to re-authenticate.
conn *pool.Conn
// checkUsableTimeout is the timeout to wait for the connection to be usable
// when the credentials are updated.
// default is 1 second
checkUsableTimeout time.Duration
}

// OnNext is called when the credentials are updated.
// It calls the reAuth function with the new credentials.
// If the reAuth function returns an error, it calls the onErr function with the error.
func (c *ConnReAuthCredentialsListener) OnNext(credentials auth2.Credentials) {
if c.conn.IsClosed() {
return
}

if c.reAuth == nil {
return
}

var err error

// this hard-coded timeout is not ideal
timeout := time.After(c.checkUsableTimeout)
// wait for the connection to be usable
// this is important because the connection pool may be in the process of reconnecting the connection
// and we don't want to interfere with that process
// but we also don't want to block for too long, so incorporate a timeout
for err == nil && !c.conn.Usable.CompareAndSwap(true, false) {
select {
case <-timeout:
err = pool.ErrConnUnusableTimeout
default:
// small sleep to avoid busy looping
time.Sleep(100 * time.Microsecond)
// yield the thread to allow other goroutines to run
runtime.Gosched()
}
}
if err == nil {
defer c.conn.SetUsable(true)
}

// This check just verifies that the connection is not in use.
// If the connection is in use, we don't want to interfere with that.
// As soon as the connection is not in use, we mark it as in use.
for err == nil && !c.conn.Used.CompareAndSwap(false, true) {
select {
case <-timeout:
err = pool.ErrConnUnusableTimeout
default:
// small sleep to avoid busy looping
time.Sleep(100 * time.Microsecond)
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sleep duration 100 * time.Microsecond is duplicated and hard-coded. Extract to a named constant like checkUsablePollInterval = 100 * time.Microsecond for better maintainability and to make the polling strategy more explicit.

Copilot uses AI. Check for mistakes.

// yield the thread to allow other goroutines to run
runtime.Gosched()
}
}

// we timed out waiting for the connection to be usable
// do not try to re-authenticate, instead call the onErr function
// which will handle the error and close the connection if needed
if err != nil {
c.OnError(err)
return
}

defer c.conn.Used.Store(false)
// we set the usable flag, so restore it back to usable after we're done
if err = c.reAuth(c.conn, credentials); err != nil {
c.OnError(err)
}
}

// OnError is called when an error occurs.
// It can be called from both the credentials provider and the reAuth function.
func (c *ConnReAuthCredentialsListener) OnError(err error) {
if c.onErr == nil {
return
}

c.onErr(c.conn, err)
}

// SetCheckUsableTimeout sets the timeout for the connection to be usable.
func (c *ConnReAuthCredentialsListener) SetCheckUsableTimeout(timeout time.Duration) {
c.checkUsableTimeout = timeout
}

// NewConnReAuthCredentialsListener creates a new ConnReAuthCredentialsListener.
// Implements the auth.CredentialsListener interface.
func NewConnReAuthCredentialsListener(conn *pool.Conn, reAuth func(conn *pool.Conn, credentials auth2.Credentials) error, onErr func(conn *pool.Conn, err error)) *ConnReAuthCredentialsListener {
return &ConnReAuthCredentialsListener{
conn: conn,
reAuth: reAuth,
onErr: onErr,
checkUsableTimeout: 1 * time.Second,
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default timeout of 1 * time.Second is hard-coded. Extract to a named constant like defaultCheckUsableTimeout = 1 * time.Second to make it easier to maintain and document the default behavior.

Copilot uses AI. Check for mistakes.

}
}

// Ensure ConnReAuthCredentialsListener implements the CredentialsListener interface.
var _ auth2.CredentialsListener = (*ConnReAuthCredentialsListener)(nil)
41 changes: 41 additions & 0 deletions internal/auth/cred_listeners.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package auth

import (
"sync"

auth2 "github.com/redis/go-redis/v9/auth"
"github.com/redis/go-redis/v9/internal/pool"
)

type CredentialsListeners struct {
listeners map[*pool.Conn]auth2.CredentialsListener
lock sync.RWMutex
}

func NewCredentialsListeners() *CredentialsListeners {
return &CredentialsListeners{
listeners: make(map[*pool.Conn]auth2.CredentialsListener),
}
}

func (c *CredentialsListeners) Add(poolCn *pool.Conn, listener auth2.CredentialsListener) {
c.lock.Lock()
defer c.lock.Unlock()
if c.listeners == nil {
c.listeners = make(map[*pool.Conn]auth2.CredentialsListener)
}
c.listeners[poolCn] = listener
}

func (c *CredentialsListeners) Get(poolCn *pool.Conn) (auth2.CredentialsListener, bool) {
c.lock.RLock()
defer c.lock.RUnlock()
listener, ok := c.listeners[poolCn]
return listener, ok
}

func (c *CredentialsListeners) Remove(poolCn *pool.Conn) {
c.lock.Lock()
defer c.lock.Unlock()
delete(c.listeners, poolCn)
}
38 changes: 25 additions & 13 deletions internal/pool/buffer_size_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package pool_test
import (
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The import of net package was removed, but net.Conn is still referenced in the comment at line 142 as 'net.Conn'. While this is just a comment, it could cause confusion. Consider either keeping the import or updating the comment to remove the package reference.

Copilot uses AI. Check for mistakes.

"bufio"
"context"
"net"
"unsafe"

. "github.com/bsm/ginkgo/v2"
Expand Down Expand Up @@ -124,20 +123,30 @@ var _ = Describe("Buffer Size Configuration", func() {
})

// Helper functions to extract buffer sizes using unsafe pointers
// The struct layout must match pool.Conn exactly to avoid checkptr violations.
// checkptr is Go's pointer safety checker, which ensures that unsafe pointer
// conversions are valid. If the struct layouts do not match exactly, this can
// cause runtime panics or incorrect memory access due to invalid pointer dereferencing.
func getWriterBufSizeUnsafe(cn *pool.Conn) int {
// Import required for atomic types
type atomicBool struct{ _ uint32 }
type atomicInt64 struct{ _ int64 }

cnPtr := (*struct {
usedAt int64
netConn net.Conn
rd *proto.Reader
bw *bufio.Writer
wr *proto.Writer
// ... other fields
id uint64 // First field in pool.Conn
usedAt int64 // Second field (atomic)
netConnAtomic interface{} // atomic.Value (interface{} has same size)
rd *proto.Reader
bw *bufio.Writer
wr *proto.Writer
// We only need fields up to bw, so we can stop here
})(unsafe.Pointer(cn))

if cnPtr.bw == nil {
return -1
}

// bufio.Writer internal structure
bwPtr := (*struct {
err error
buf []byte
Expand All @@ -150,18 +159,20 @@ func getWriterBufSizeUnsafe(cn *pool.Conn) int {

func getReaderBufSizeUnsafe(cn *pool.Conn) int {
cnPtr := (*struct {
usedAt int64
netConn net.Conn
rd *proto.Reader
bw *bufio.Writer
wr *proto.Writer
// ... other fields
id uint64 // First field in pool.Conn
usedAt int64 // Second field (atomic)
netConnAtomic interface{} // atomic.Value (interface{} has same size)
rd *proto.Reader
bw *bufio.Writer
wr *proto.Writer
// We only need fields up to rd, so we can stop here
})(unsafe.Pointer(cn))

if cnPtr.rd == nil {
return -1
}

// proto.Reader internal structure
rdPtr := (*struct {
rd *bufio.Reader
})(unsafe.Pointer(cnPtr.rd))
Expand All @@ -170,6 +181,7 @@ func getReaderBufSizeUnsafe(cn *pool.Conn) int {
return -1
}

// bufio.Reader internal structure
bufReaderPtr := (*struct {
buf []byte
rd interface{}
Expand Down
Loading
Loading