Skip to content
14 changes: 12 additions & 2 deletions clientconn.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ func (cc *ClientConn) addTraceEvent(msg string) {
Severity: channelz.CtInfo,
}
}
channelz.AddTraceEvent(logger, cc.channelz, 0, ted)
channelz.AddTraceEvent(logger, cc.channelz, 1, ted)
}

type idler ClientConn
Expand All @@ -356,11 +356,21 @@ func (cc *ClientConn) exitIdleMode() (err error) {
}
cc.mu.Unlock()

// Set state to CONNECTING before building the name resolver
// so the channel does not remain in IDLE.
cc.csMgr.updateState(connectivity.Connecting)

// This needs to be called without cc.mu because this builds a new resolver
// which might update state or report error inline, which would then need to
// acquire cc.mu.
if err := cc.resolverWrapper.start(); err != nil {
return err
// If resolver creation fails, transition to TransientFailure. For a
// channel created with `NewClient`, the error will be returned on the
// first RPC. For a channel created with `Dial`, the error will be
// returned by `Dial`.
logger.Warningf("Failed to start resolver: %v", err)
cc.csMgr.updateState(connectivity.TransientFailure)
return status.Error(codes.Unavailable, err.Error())
}

cc.addTraceEvent("exiting idle mode")
Expand Down
34 changes: 34 additions & 0 deletions dial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package grpc

import (
"context"
"fmt"
"net"
"strings"
"testing"
Expand Down Expand Up @@ -312,3 +313,36 @@ func (s) TestResolverAddressesWithTypedNilAttribute(t *testing.T) {
type stringerVal struct{ s string }

func (s stringerVal) String() string { return s.s }

const errResolverBuilderScheme = "test-resolver-build-failure"

// errResolverBuilder is a resolver builder that returns an error from its Build
// method.
type errResolverBuilder struct {
err error
}

func (b *errResolverBuilder) Build(resolver.Target, resolver.ClientConn, resolver.BuildOptions) (resolver.Resolver, error) {
return nil, b.err
}

func (b *errResolverBuilder) Scheme() string {
return errResolverBuilderScheme
}

// Tests that Dial returns an error if the resolver builder returns an error
// from its Build method.
func (s) TestDial_ResolverBuilder_Error(t *testing.T) {
resolverErr := fmt.Errorf("resolver builder error")
dopts := []DialOption{
WithTransportCredentials(insecure.NewCredentials()),
WithResolvers(&errResolverBuilder{err: resolverErr}),
}
_, err := Dial(errResolverBuilderScheme+":///test.server", dopts...)
if err == nil {
t.Fatalf("Dial() succeeded when it should have failed")
}
if !strings.Contains(err.Error(), resolverErr.Error()) {
t.Fatalf("Dial() failed with error %v, want %v", err, resolverErr)
}
}
12 changes: 9 additions & 3 deletions internal/idle/idle.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,16 +234,22 @@ func (m *Manager) ExitIdleMode() error {
return nil
}

if err := m.enforcer.ExitIdleMode(); err != nil {
return fmt.Errorf("failed to exit idle mode: %w", err)
}
// This can fail if resolver creation fails. In that case, we want to
// return the error to the caller so that the RPC can fail. But we still
// need to undo the idle entry process, and ensure that the idle timer is
// started again.
err := m.enforcer.ExitIdleMode()

// Undo the idle entry process. This also respects any new RPC attempts.
atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
m.actuallyIdle = false

// Start a new timer to fire after the configured idle timeout.
m.resetIdleTimerLocked(m.timeout)

if err != nil {
return fmt.Errorf("failed to exit idle mode: %v", err)
}
return nil
}

Expand Down
43 changes: 42 additions & 1 deletion internal/idle/idle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package idle
import (
"context"
"fmt"
"strings"
"sync"
"sync/atomic"
"testing"
Expand All @@ -44,13 +45,14 @@ func Test(t *testing.T) {
}

type testEnforcer struct {
exitIdleErr error
exitIdleCh chan struct{}
enterIdleCh chan struct{}
}

func (ti *testEnforcer) ExitIdleMode() error {
ti.exitIdleCh <- struct{}{}
return nil
return ti.exitIdleErr

}

Expand Down Expand Up @@ -381,3 +383,42 @@ func (s) TestManager_IdleTimeoutRacesWithOnCallBegin(t *testing.T) {
})
}
}

// TestManager_ExitIdleError tests the case where ExitIdleMode on the enforcer
// returns an error. It verifies that the idle timer is started and the channel
// eventually attempts to enter idle mode.
func (s) TestManager_ExitIdleError(t *testing.T) {
callbackCh := overrideNewTimer(t)
exitIdleErr := fmt.Errorf("exit idle error")
enforcer := newTestEnforcer()
enforcer.exitIdleErr = exitIdleErr

mgr := NewManager(enforcer, defaultTestIdleTimeout)
defer mgr.Close()

// Call ExitIdleMode and expect it to fail.
if err := mgr.ExitIdleMode(); err == nil || !strings.Contains(err.Error(), "exit idle error") {
t.Fatalf("mgr.ExitIdleMode() returned: %v, want error: %v", err, exitIdleErr)
}

// Verify that ExitIdleMode was called on the enforcer.
select {
case <-enforcer.exitIdleCh:
case <-time.After(defaultTestShortTimeout):
t.Fatal("Timeout waiting for ExitIdleMode to be called on the enforcer")
}

// The timer should have been started. Wait for it to fire.
select {
case <-callbackCh:
case <-time.After(2 * defaultTestIdleTimeout):
t.Fatal("Timeout waiting for idle timer callback to fire")
}

// After the timer fires, the manager should attempt to enter idle mode.
select {
case <-enforcer.enterIdleCh:
case <-time.After(defaultTestShortTimeout):
t.Fatal("Timeout waiting for EnterIdleMode to be called on the enforcer")
}
}
40 changes: 40 additions & 0 deletions resolver_balancer_ext_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ import (

"google.golang.org/grpc"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/balancer/stub"
"google.golang.org/grpc/internal/channelz"
"google.golang.org/grpc/internal/testutils"
"google.golang.org/grpc/resolver"
"google.golang.org/grpc/resolver/manual"
"google.golang.org/grpc/status"

testgrpc "google.golang.org/grpc/interop/grpc_testing"
testpb "google.golang.org/grpc/interop/grpc_testing"
)

// TestResolverBalancerInteraction tests:
Expand Down Expand Up @@ -127,6 +133,40 @@ func (s) TestResolverBuildFailure(t *testing.T) {
}
}

// Tests the case where the resolver reports an error to the channel before
// reporting an update. Verifies that the channel eventually moves to
// TransientFailure and a subsequent RPCs returns the error reported by the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/ a / /

// resolver to the user.
func (s) TestResolverReportError(t *testing.T) {
const resolverErr = "test resolver error"
r := manual.NewBuilderWithScheme("whatever")
r.BuildCallback = func(_ resolver.Target, cc resolver.ClientConn, _ resolver.BuildOptions) {
cc.ReportError(errors.New(resolverErr))
}

cc, err := grpc.NewClient(r.Scheme()+":///", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r))
if err != nil {
t.Fatalf("Error creating client: %v", err)
}
defer cc.Close()
cc.Connect()

ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)

client := testgrpc.NewTestServiceClient(cc)
for range 5 {
_, err = client.EmptyCall(ctx, &testpb.Empty{})
if code := status.Code(err); code != codes.Unavailable {
t.Fatalf("EmptyCall() = %v, want %v", err, codes.Unavailable)
}
if err == nil || !strings.Contains(err.Error(), resolverErr) {
t.Fatalf("EmptyCall() = %q, want %q", err, resolverErr)
}
}
}

// TestEnterIdleDuringResolverUpdateState tests a scenario that used to deadlock
// while calling UpdateState at the same time as the resolver being closed while
// the channel enters idle mode.
Expand Down
1 change: 1 addition & 0 deletions resolver_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func (ccr *ccResolverWrapper) start() error {
Authority: ccr.cc.authority,
MetricsRecorder: ccr.cc.metricsRecorderList,
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert this diff & file.

var err error
// The delegating resolver is used unless:
// - A custom dialer is provided via WithContextDialer dialoption or
Expand Down
2 changes: 1 addition & 1 deletion stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ func newClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth
// created for both streaming and unary RPCs, and hence is a good place to
// track active RPC count.
if err := cc.idlenessMgr.OnCallBegin(); err != nil {
return nil, err
return nil, status.Error(codes.Unavailable, err.Error())
}
// Add a calloption, to decrement the active call count, that gets executed
// when the RPC completes.
Expand Down
Loading