Skip to content

Commit 6c597f6

Browse files
authored
go/vt/discovery: configurable logger (vitessio#17846)
Signed-off-by: Max Englander <[email protected]>
1 parent b3d80b2 commit 6c597f6

File tree

4 files changed

+141
-33
lines changed

4 files changed

+141
-33
lines changed

go/vt/discovery/healthcheck.go

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import (
5454
"vitess.io/vitess/go/netutil"
5555
"vitess.io/vitess/go/stats"
5656
"vitess.io/vitess/go/vt/log"
57+
"vitess.io/vitess/go/vt/logutil"
5758
"vitess.io/vitess/go/vt/proto/query"
5859
"vitess.io/vitess/go/vt/proto/topodata"
5960
"vitess.io/vitess/go/vt/proto/vtrpc"
@@ -190,8 +191,10 @@ func FilteringKeyspaces() bool {
190191
return len(KeyspacesToWatch) > 0
191192
}
192193

193-
type KeyspaceShardTabletType string
194-
type tabletAliasString string
194+
type (
195+
KeyspaceShardTabletType string
196+
tabletAliasString string
197+
)
195198

196199
// HealthCheck declares what the TabletGateway needs from the HealthCheck
197200
type HealthCheck interface {
@@ -301,6 +304,9 @@ type HealthCheckImpl struct {
301304
subscribers map[chan *TabletHealth]string
302305
// loadTabletsTrigger is used to immediately load information about tablets of a specific shard.
303306
loadTabletsTrigger chan topo.KeyspaceShard
307+
// options contains optional settings used to modify HealthCheckImpl
308+
// behavior.
309+
options Options
304310
}
305311

306312
// NewVTGateHealthCheckFilters returns healthcheck filters for vtgate.
@@ -352,9 +358,9 @@ func NewVTGateHealthCheckFilters() (filters TabletFilters, err error) {
352358
// filters.
353359
//
354360
// Is one or more filters to apply when determining what tablets we want to stream healthchecks from.
355-
func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Duration, topoServer *topo.Server, localCell, cellsToWatch string, filters TabletFilter) *HealthCheckImpl {
356-
log.Infof("loading tablets for cells: %v", cellsToWatch)
357-
361+
func NewHealthCheck(
362+
ctx context.Context, retryDelay, healthCheckTimeout time.Duration, topoServer *topo.Server, localCell, cellsToWatch string, filters TabletFilter, opts ...Option,
363+
) *HealthCheckImpl {
358364
hc := &HealthCheckImpl{
359365
ts: topoServer,
360366
cell: localCell,
@@ -366,19 +372,23 @@ func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Dur
366372
subscribers: make(map[chan *TabletHealth]string),
367373
cellAliases: make(map[string]string),
368374
loadTabletsTrigger: make(chan topo.KeyspaceShard, 1024),
375+
options: withOptions(opts...),
369376
}
377+
378+
hc.logger().Infof("loading tablets for cells: %v", cellsToWatch)
379+
370380
var topoWatchers []*TopologyWatcher
371381
cells := strings.Split(cellsToWatch, ",")
372382
if cellsToWatch == "" {
373383
cells = append(cells, localCell)
374384
}
375385

376386
for _, c := range cells {
377-
log.Infof("Setting up healthcheck for cell: %v", c)
387+
hc.logger().Infof("Setting up healthcheck for cell: %v", c)
378388
if c == "" {
379389
continue
380390
}
381-
topoWatchers = append(topoWatchers, NewTopologyWatcher(ctx, topoServer, hc, filters, c, refreshInterval, refreshKnownTablets))
391+
topoWatchers = append(topoWatchers, NewTopologyWatcher(ctx, topoServer, hc, filters, c, refreshInterval, refreshKnownTablets, opts...))
382392
}
383393

384394
hc.topoWatchers = topoWatchers
@@ -403,7 +413,7 @@ func (hc *HealthCheckImpl) AddTablet(tablet *topodata.Tablet) {
403413
return
404414
}
405415

406-
log.Infof("Adding tablet to healthcheck: %v", tablet)
416+
hc.logger().Infof("Adding tablet to healthcheck: %v", tablet)
407417
hc.mu.Lock()
408418
defer hc.mu.Unlock()
409419
if hc.healthByAlias == nil {
@@ -421,14 +431,15 @@ func (hc *HealthCheckImpl) AddTablet(tablet *topodata.Tablet) {
421431
cancelFunc: cancelFunc,
422432
Tablet: tablet,
423433
Target: target,
434+
logger: hc.logger(),
424435
}
425436

426437
// add to our datastore
427438
key := KeyFromTarget(target)
428439
tabletAlias := topoproto.TabletAliasString(tablet.Alias)
429440
if _, ok := hc.healthByAlias[tabletAliasString(tabletAlias)]; ok {
430441
// We should not add a tablet that we already have
431-
log.Errorf("Program bug: tried to add existing tablet: %v to healthcheck", tabletAlias)
442+
hc.logger().Errorf("Program bug: tried to add existing tablet: %v to healthcheck", tabletAlias)
432443
return
433444
}
434445
hc.healthByAlias[tabletAliasString(tabletAlias)] = thc
@@ -456,7 +467,7 @@ func (hc *HealthCheckImpl) ReplaceTablet(old, new *topodata.Tablet) {
456467
}
457468

458469
func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) {
459-
log.Infof("Removing tablet from healthcheck: %v", tablet)
470+
hc.logger().Infof("Removing tablet from healthcheck: %v", tablet)
460471
hc.mu.Lock()
461472
defer hc.mu.Unlock()
462473

@@ -499,7 +510,7 @@ func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) {
499510
// delete from authoritative map
500511
th, ok := hc.healthByAlias[tabletAlias]
501512
if !ok {
502-
log.Infof("We have no health data for tablet: %v, it might have been deleted already", tablet)
513+
hc.logger().Infof("We have no health data for tablet: %v, it might have been deleted already", tablet)
503514
return
504515
}
505516
// Calling this will end the context associated with th.checkConn,
@@ -518,7 +529,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ
518529
// so that we're not racing to update it and in effect re-adding a copy of the
519530
// tablet record that was deleted
520531
if _, ok := hc.healthByAlias[tabletAlias]; !ok {
521-
log.Infof("Tablet %v has been deleted, skipping health update", th.Tablet)
532+
hc.logger().Infof("Tablet %v has been deleted, skipping health update", th.Tablet)
522533
return
523534
}
524535

@@ -543,7 +554,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ
543554
// causing an interruption where no primary is assigned to the shard.
544555
if prevTarget.TabletType == topodata.TabletType_PRIMARY {
545556
if primaries := hc.healthData[oldTargetKey]; len(primaries) == 0 {
546-
log.Infof("We will have no health data for the next new primary tablet after demoting the tablet: %v, so start loading tablets now", topotools.TabletIdent(th.Tablet))
557+
hc.logger().Infof("We will have no health data for the next new primary tablet after demoting the tablet: %v, so start loading tablets now", topotools.TabletIdent(th.Tablet))
547558
// We want to trigger a call to load tablets for this keyspace-shard,
548559
// but we want this to be non-blocking to prevent the code from deadlocking as described in https://github.com/vitessio/vitess/issues/16994.
549560
// If the buffer is exhausted, then we'll just receive the update when all the tablets are loaded on the ticker.
@@ -572,7 +583,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ
572583
// We already have one up server, see if we
573584
// need to replace it.
574585
if th.PrimaryTermStartTime < hc.healthy[targetKey][0].PrimaryTermStartTime {
575-
log.Warningf("not marking healthy primary %s as Up for %s because its PrimaryTermStartTime is smaller than the highest known timestamp from previous PRIMARYs %s: %d < %d ",
586+
hc.logger().Warningf("not marking healthy primary %s as Up for %s because its PrimaryTermStartTime is smaller than the highest known timestamp from previous PRIMARYs %s: %d < %d ",
576587
topoproto.TabletAliasString(th.Tablet.Alias),
577588
topoproto.KeyspaceShardString(th.Target.Keyspace, th.Target.Shard),
578589
topoproto.TabletAliasString(hc.healthy[targetKey][0].Tablet.Alias),
@@ -609,7 +620,7 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ
609620

610621
isNewPrimary := isPrimary && prevTarget.TabletType != topodata.TabletType_PRIMARY
611622
if isNewPrimary {
612-
log.Errorf("Adding 1 to PrimaryPromoted counter for target: %v, tablet: %v, tabletType: %v", prevTarget, topoproto.TabletAliasString(th.Tablet.Alias), th.Target.TabletType)
623+
hc.logger().Errorf("Adding 1 to PrimaryPromoted counter for target: %v, tablet: %v, tabletType: %v", prevTarget, topoproto.TabletAliasString(th.Tablet.Alias), th.Target.TabletType)
613624
hcPrimaryPromotedCounters.Add([]string{th.Target.Keyspace, th.Target.Shard}, 1)
614625
}
615626

@@ -666,7 +677,7 @@ func (hc *HealthCheckImpl) broadcast(th *TabletHealth) {
666677
default:
667678
// If the channel is full, we drop the message.
668679
hcChannelFullCounter.Add(1)
669-
log.Warningf("HealthCheck broadcast channel is full for %v, dropping message for %s", subscriber, topotools.TabletIdent(th.Tablet))
680+
hc.logger().Warningf("HealthCheck broadcast channel is full for %v, dropping message for %s", subscriber, topotools.TabletIdent(th.Tablet))
670681
// Print the stack trace only once.
671682
printStack()
672683
}
@@ -849,7 +860,7 @@ func (hc *HealthCheckImpl) waitForTablets(ctx context.Context, targets []*query.
849860
timer.Stop()
850861
for _, target := range targets {
851862
if target != nil {
852-
log.Infof("couldn't find tablets for target: %v", target)
863+
hc.logger().Infof("couldn't find tablets for target: %v", target)
853864
}
854865
}
855866
return ctx.Err()
@@ -977,7 +988,7 @@ func (hc *HealthCheckImpl) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
977988
if err != nil {
978989
// Error logged
979990
if _, err := w.Write([]byte(err.Error())); err != nil {
980-
log.Errorf("write to buffer error failed: %v", err)
991+
hc.logger().Errorf("write to buffer error failed: %v", err)
981992
}
982993

983994
return
@@ -988,7 +999,7 @@ func (hc *HealthCheckImpl) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
988999

9891000
// Error logged
9901001
if _, err := w.Write(buf.Bytes()); err != nil {
991-
log.Errorf("write to buffer bytes failed: %v", err)
1002+
hc.logger().Errorf("write to buffer bytes failed: %v", err)
9921003
}
9931004
}
9941005

@@ -1029,6 +1040,11 @@ func (hc *HealthCheckImpl) stateChecksum() int64 {
10291040
return int64(crc32.ChecksumIEEE(buf.Bytes()))
10301041
}
10311042

1043+
// logger returns the logutil.Logger used by the healthcheck.
1044+
func (hc *HealthCheckImpl) logger() logutil.Logger {
1045+
return hc.options.logger
1046+
}
1047+
10321048
// TabletToMapKey creates a key to the map from tablet's host and ports.
10331049
// It should only be used in discovery and related module.
10341050
func TabletToMapKey(tablet *topodata.Tablet) string {

go/vt/discovery/options.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
Copyright 2025 The Vitess Authors.
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
14+
package discovery
15+
16+
import (
17+
"vitess.io/vitess/go/vt/logutil"
18+
)
19+
20+
// Options configure a discovery components. Options are set by the Option
21+
// values passed to the component constructors.
22+
type Options struct {
23+
logger logutil.Logger
24+
}
25+
26+
// Option configures how we perform certain operations.
27+
type Option interface {
28+
apply(*Options)
29+
}
30+
31+
// funcOption wraps a function that modifies options into an implementation of
32+
// the Option interface.
33+
type funcOption struct {
34+
f func(*Options)
35+
}
36+
37+
func defaultOptions() Options {
38+
return Options{
39+
logger: logutil.NewConsoleLogger(),
40+
}
41+
}
42+
43+
func withOptions(dos ...Option) Options {
44+
os := defaultOptions()
45+
for _, do := range dos {
46+
do.apply(&os)
47+
}
48+
return os
49+
}
50+
51+
func (fhco *funcOption) apply(dos *Options) {
52+
fhco.f(dos)
53+
}
54+
55+
func newFuncOption(f func(*Options)) *funcOption {
56+
return &funcOption{
57+
f: f,
58+
}
59+
}
60+
61+
// WithLogger accepts a custom logger to use in a discovery component. If this
62+
// option is not provided then the default system logger will be used.
63+
func WithLogger(l logutil.Logger) Option {
64+
return newFuncOption(func(o *Options) {
65+
o.logger = l
66+
})
67+
}

go/vt/discovery/tablet_health_check.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525
"time"
2626

2727
"vitess.io/vitess/go/vt/grpcclient"
28-
"vitess.io/vitess/go/vt/log"
28+
"vitess.io/vitess/go/vt/logutil"
2929
"vitess.io/vitess/go/vt/proto/vtrpc"
3030
"vitess.io/vitess/go/vt/topo/topoproto"
3131
"vitess.io/vitess/go/vt/topotools"
@@ -71,6 +71,8 @@ type tabletHealthCheck struct {
7171
// possibly delete both these
7272
loggedServingState bool
7373
lastResponseTimestamp time.Time // timestamp of the last healthcheck response
74+
// logger is used to log messages.
75+
logger logutil.Logger
7476
}
7577

7678
// String is defined because we want to print a []*tabletHealthCheck array nicely.
@@ -107,7 +109,7 @@ func (thc *tabletHealthCheck) setServingState(serving bool, reason string) {
107109
if !thc.loggedServingState || (serving != thc.Serving) {
108110
// Emit the log from a separate goroutine to avoid holding
109111
// the th lock while logging is happening
110-
log.Infof("HealthCheckUpdate(Serving State): tablet: %v serving %v => %v for %v/%v (%v) reason: %s",
112+
thc.logger.Infof("HealthCheckUpdate(Serving State): tablet: %v serving %v => %v for %v/%v (%v) reason: %s",
111113
topotools.TabletIdent(thc.Tablet),
112114
thc.Serving,
113115
serving,
@@ -294,7 +296,7 @@ func (thc *tabletHealthCheck) checkConn(hc *HealthCheckImpl) {
294296
// the healthcheck cache again via the topology watcher.
295297
// WARNING: Under no other circumstances should we be deleting the tablet here.
296298
if strings.Contains(err.Error(), "health stats mismatch") {
297-
log.Warningf("deleting tablet %v from healthcheck due to health stats mismatch", thc.Tablet)
299+
thc.logger.Warningf("deleting tablet %v from healthcheck due to health stats mismatch", thc.Tablet)
298300
hc.deleteTablet(thc.Tablet)
299301
return
300302
}
@@ -331,7 +333,7 @@ func (thc *tabletHealthCheck) checkConn(hc *HealthCheckImpl) {
331333
}
332334

333335
func (thc *tabletHealthCheck) closeConnection(ctx context.Context, err error) {
334-
log.Warningf("tablet %v healthcheck stream error: %v", thc.Tablet, err)
336+
thc.logger.Warningf("tablet %v healthcheck stream error: %v", thc.Tablet, err)
335337
thc.setServingState(false, err.Error())
336338
thc.LastError = err
337339
_ = thc.Conn.Close(ctx)

0 commit comments

Comments
 (0)