Skip to content

Commit b3d80b2

Browse files
Properly handle grpc dial errors in the throttler metric aggregation (vitessio#18073)
Signed-off-by: Arthur Schreiber <[email protected]> Signed-off-by: Mohamed Hamza <[email protected]> Co-authored-by: Mohamed Hamza <[email protected]>
1 parent d36d183 commit b3d80b2

File tree

3 files changed

+122
-1
lines changed

3 files changed

+122
-1
lines changed

go/vt/vttablet/tabletserver/throttle/base/metric_result.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ package base
1919
import (
2020
"errors"
2121
"net"
22+
23+
"google.golang.org/grpc/codes"
24+
"google.golang.org/grpc/status"
2225
)
2326

2427
// MetricResult is what we expect our probes to return. This can be a numeric result, or
@@ -58,6 +61,11 @@ func IsDialTCPError(err error) bool {
5861
if err == nil {
5962
return false
6063
}
64+
65+
if s, ok := status.FromError(err); ok {
66+
return s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded
67+
}
68+
6169
switch err := err.(type) {
6270
case *net.OpError:
6371
return err.Op == "dial" && err.Net == "tcp"

go/vt/vttablet/tabletserver/throttle/throttler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, probe
904904
req := &tabletmanagerdatapb.CheckThrottlerRequest{} // We leave AppName empty; it will default to VitessName anyway, and we can save some proto space
905905
resp, gRPCErr := tmClient.CheckThrottler(ctx, probe.Tablet, req)
906906
if gRPCErr != nil {
907-
return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%v", probe.Alias, gRPCErr))
907+
return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%w", probe.Alias, gRPCErr))
908908
}
909909
throttleMetric.Value = resp.Value
910910
if resp.ResponseCode == tabletmanagerdatapb.CheckThrottlerResponseCode_INTERNAL_ERROR {

go/vt/vttablet/tabletserver/throttle/throttler_test.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,15 @@ import (
3030
"github.com/stretchr/testify/require"
3131
"golang.org/x/exp/maps"
3232

33+
"google.golang.org/grpc"
34+
"google.golang.org/grpc/credentials/insecure"
35+
3336
"vitess.io/vitess/go/protoutil"
37+
38+
"vitess.io/vitess/go/vt/grpcclient"
3439
"vitess.io/vitess/go/vt/topo"
3540
"vitess.io/vitess/go/vt/vtenv"
41+
"vitess.io/vitess/go/vt/vttablet/grpctmclient"
3642
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
3743
"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
3844
"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base"
@@ -837,6 +843,113 @@ func TestApplyThrottlerConfigAppCheckedMetrics(t *testing.T) {
837843
})
838844
}
839845

846+
func TestIsDialTCPError(t *testing.T) {
847+
// Verify that IsDialTCPError actually recognizes grpc dial errors
848+
cc, err := grpcclient.DialContext(t.Context(), ":0", true, grpc.WithTransportCredentials(insecure.NewCredentials()))
849+
require.NoError(t, err)
850+
defer cc.Close()
851+
852+
err = cc.Invoke(context.Background(), "/Fail", nil, nil)
853+
854+
require.True(t, base.IsDialTCPError(err))
855+
require.True(t, base.IsDialTCPError(fmt.Errorf("wrapped: %w", err)))
856+
857+
nonDialErr := fmt.Errorf("rpc error: code = NotFound desc = method not found")
858+
require.False(t, base.IsDialTCPError(nonDialErr))
859+
}
860+
861+
func TestProbeWithUnavailableHost(t *testing.T) {
862+
throttler := Throttler{
863+
throttledApps: cache.New(cache.NoExpiration, 0),
864+
heartbeatWriter: &FakeHeartbeatWriter{},
865+
}
866+
867+
alias := &topodatapb.TabletAlias{
868+
Cell: "cell1",
869+
Uid: 100,
870+
}
871+
872+
// The hostname used here is not routable, so the connection will fail.
873+
tablet := &topo.TabletInfo{
874+
Tablet: &topodatapb.Tablet{
875+
Alias: alias,
876+
Hostname: "192.0.2.0",
877+
MysqlHostname: "192.0.2.0",
878+
MysqlPort: 3306,
879+
PortMap: map[string]int32{"grpc": 5000},
880+
Type: topodatapb.TabletType_PRIMARY,
881+
},
882+
}
883+
884+
probe := &base.Probe{
885+
Alias: "cell1-100",
886+
Tablet: tablet.Tablet,
887+
CacheMillis: 100,
888+
}
889+
890+
tmClient := grpctmclient.NewClient()
891+
892+
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
893+
894+
metrics := probeFunc(t.Context(), tmClient)
895+
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
896+
897+
tabletResultsMap := base.TabletResultMap{
898+
"cell1-100": base.MetricResultMap{
899+
"custom": metrics["custom"],
900+
},
901+
}
902+
903+
worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0)
904+
require.Equal(t, base.NoHostsMetricResult, worstMetric)
905+
}
906+
907+
func TestProbeWithEmptyHostAndPort(t *testing.T) {
908+
throttler := Throttler{
909+
throttledApps: cache.New(cache.NoExpiration, 0),
910+
heartbeatWriter: &FakeHeartbeatWriter{},
911+
}
912+
913+
alias := &topodatapb.TabletAlias{
914+
Cell: "cell1",
915+
Uid: 100,
916+
}
917+
918+
// The hostname used here is not routable, so the connection will fail.
919+
tablet := &topo.TabletInfo{
920+
Tablet: &topodatapb.Tablet{
921+
Alias: alias,
922+
Hostname: "",
923+
MysqlHostname: "192.0.2.0",
924+
MysqlPort: 3306,
925+
PortMap: map[string]int32{"grpc": 0},
926+
Type: topodatapb.TabletType_PRIMARY,
927+
},
928+
}
929+
930+
probe := &base.Probe{
931+
Alias: "cell1-100",
932+
Tablet: tablet.Tablet,
933+
CacheMillis: 100,
934+
}
935+
936+
tmClient := grpctmclient.NewClient()
937+
938+
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
939+
940+
metrics := probeFunc(t.Context(), tmClient)
941+
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
942+
943+
tabletResultsMap := base.TabletResultMap{
944+
"cell1-100": base.MetricResultMap{
945+
"custom": metrics["custom"],
946+
},
947+
}
948+
949+
worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0)
950+
require.Equal(t, base.NoHostsMetricResult, worstMetric)
951+
}
952+
840953
func TestIsAppThrottled(t *testing.T) {
841954
plusOneHour := time.Now().Add(time.Hour)
842955
throttler := Throttler{

0 commit comments

Comments
 (0)