Skip to content

Commit 849d1b7

Browse files
authored
feat: skip memberlist zone-aware routing if there is a zone with members but no alive bridges (#800)
**What this PR does**: When zone-aware routing is enabled, if there is a zone with members but no alive bridges we end up in a network partitioning state. I think we can make the routing logic more resilient by checking for this specific condition, and internally disable the zone-aware routing if we detect a zone without alive bridges. The new logic is definitely slower than the previous one, but in absolute terms I don't think it's prohibitive. `SelectNodes()` gets called once every gossip interval (200ms) and takes 0.17ms with 10K nodes and 2 zones (10K nodes is a pretty large scale): ``` goos: darwin goarch: arm64 pkg: github.com/grafana/dskit/kv/memberlist cpu: Apple M3 Pro BenchmarkZoneAwareNodeSelectionDelegate_SelectNodes-11 7033 170166 ns/op 81922 B/op 1 allocs/op BenchmarkZoneAwareNodeSelectionDelegate_SelectNodes-11 6940 170834 ns/op 81922 B/op 1 allocs/op BenchmarkZoneAwareNodeSelectionDelegate_SelectNodes-11 6925 171649 ns/op 81922 B/op 1 allocs/op BenchmarkZoneAwareNodeSelectionDelegate_SelectNodes-11 6906 213222 ns/op 81922 B/op 1 allocs/op BenchmarkZoneAwareNodeSelectionDelegate_SelectNodes-11 6957 172906 ns/op 81922 B/op 1 allocs/op PASS ok github.com/grafana/dskit/kv/memberlist 9.339s ``` This PR is based on grafana/memberlist#13. **Which issue(s) this PR fixes**: N/A **Checklist** - [x] Tests updated --------- Signed-off-by: Marco Pracucci <marco@pracucci.com>
1 parent 191763b commit 849d1b7

File tree

5 files changed

+377
-100
lines changed

5 files changed

+377
-100
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,4 @@ require (
136136

137137
// Replace memberlist with our fork which includes some fixes that haven't been
138138
// merged upstream yet.
139-
replace github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20251125151730-2125cd96c917
139+
replace github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20251126142931-6f9f62ab6f86

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
139139
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
140140
github.com/grafana/gomemcache v0.0.0-20251008140118-65a671e12fdf h1:4fN03UTf6i7wZbLfxL+L/jAIiAxoThFi4rb9//SoGlY=
141141
github.com/grafana/gomemcache v0.0.0-20251008140118-65a671e12fdf/go.mod h1:j/s0jkda4UXTemDs7Pgw/vMT06alWc42CHisvYac0qw=
142-
github.com/grafana/memberlist v0.3.1-0.20251125151730-2125cd96c917 h1:9NzUQJ7ODlhiyYTc428KisLveJyA6Nd7rgqf/u6M/8U=
143-
github.com/grafana/memberlist v0.3.1-0.20251125151730-2125cd96c917/go.mod h1:h60o12SZn/ua/j0B6iKAZezA4eDaGsIuPO70eOaJ6WE=
142+
github.com/grafana/memberlist v0.3.1-0.20251126142931-6f9f62ab6f86 h1:aTwfQuroOmOr//QEn9J1MtC4R4CPR9/IbUd8hZrbWKo=
143+
github.com/grafana/memberlist v0.3.1-0.20251126142931-6f9f62ab6f86/go.mod h1:h60o12SZn/ua/j0B6iKAZezA4eDaGsIuPO70eOaJ6WE=
144144
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
145145
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
146146
github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og=

kv/memberlist/memberlist_client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ func (m *KV) configureZoneAwareRouting(mlCfg *memberlist.Config) error {
528528
m.nodeMeta = localMeta
529529

530530
// Set up the node selection delegate.
531-
mlCfg.NodeSelection = newZoneAwareNodeSelectionDelegate(role, m.cfg.ZoneAwareRouting.Zone, m.logger)
531+
mlCfg.NodeSelection = newZoneAwareNodeSelectionDelegate(role, m.cfg.ZoneAwareRouting.Zone, m.logger, m.registerer)
532532

533533
// The bridge always prefer another bridge as first node. If the bridge only push/pull to 1 node per interval, then
534534
// it will only communicate to bridges, potentially leading to network partitioning if the gossiping is not

kv/memberlist/node_zone_aware_routing.go

Lines changed: 92 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ package memberlist
33
import (
44
"flag"
55
"fmt"
6+
"math/rand"
7+
"slices"
68

79
"github.com/go-kit/log"
810
"github.com/go-kit/log/level"
911
"github.com/hashicorp/memberlist"
12+
"github.com/prometheus/client_golang/prometheus"
13+
"github.com/prometheus/client_golang/prometheus/promauto"
1014
)
1115

1216
// NodeRole represents the role of a node in the memberlist cluster.
@@ -85,29 +89,107 @@ type zoneAwareNodeSelectionDelegate struct {
8589
localRole NodeRole
8690
localZone string
8791
logger log.Logger
92+
93+
// Metrics
94+
selectNodesCalls prometheus.Counter
95+
selectNodesCallsSkipped prometheus.Counter
8896
}
8997

9098
// newZoneAwareNodeSelectionDelegate creates a new zone-aware node selection delegate.
91-
func newZoneAwareNodeSelectionDelegate(localRole NodeRole, localZone string, logger log.Logger) *zoneAwareNodeSelectionDelegate {
99+
func newZoneAwareNodeSelectionDelegate(localRole NodeRole, localZone string, logger log.Logger, registerer prometheus.Registerer) *zoneAwareNodeSelectionDelegate {
92100
return &zoneAwareNodeSelectionDelegate{
93101
localRole: localRole,
94102
localZone: localZone,
95103
logger: logger,
104+
selectNodesCalls: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
105+
Name: "memberlist_client_zone_aware_routing_select_nodes_total",
106+
Help: "Total number of times memberlist attempted to select node candidates for gossiping (tracked only when when zone-aware routing is enabled).",
107+
}),
108+
selectNodesCallsSkipped: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
109+
Name: "memberlist_client_zone_aware_routing_select_nodes_skipped_total",
110+
Help: "Total number of times memberlist zone-aware routing was skipped because the local zone is unknown or a zone has no alive bridges.",
111+
}),
96112
}
97113
}
98114

99-
// SelectNode implements memberlist.NodeSelectionDelegate.
100-
// It determines whether a remote node should be selected for gossip operations and whether it should be preferred.
101-
func (d *zoneAwareNodeSelectionDelegate) SelectNode(node memberlist.Node) (selected, preferred bool) {
102-
// Decode remote node metadata.
103-
remoteMeta := EncodedNodeMetadata(node.Meta)
104-
remoteZone := remoteMeta.Zone()
105-
remoteRole := remoteMeta.Role()
115+
// SelectNodes implements memberlist.NodeSelectionDelegate.
116+
// It determines which remote nodes should be selected for gossip operations and which one should be preferred.
117+
func (d *zoneAwareNodeSelectionDelegate) SelectNodes(nodes []*memberlist.NodeState) (selected []*memberlist.NodeState, preferred *memberlist.NodeState) {
118+
d.selectNodesCalls.Inc()
119+
120+
if d.localRole != NodeRoleMember && d.localRole != NodeRoleBridge {
121+
level.Warn(d.logger).Log("msg", "memberlist zone-aware routing is running with an unknown role", "role", d.localRole)
122+
}
123+
124+
// Skip zone-aware routing if local zone is not set.
125+
if d.localZone == "" {
126+
d.selectNodesCallsSkipped.Inc()
127+
return nodes, nil
128+
}
129+
130+
// Pre-allocate backing arrays on the stack for up to 5 zones (common case).
131+
zonesWithMembers := make([]string, 0, 5)
132+
zonesWithAliveBridges := make([]string, 0, 5)
133+
134+
// Build selected slice and track zones in a single pass.
135+
selected = make([]*memberlist.NodeState, 0, len(nodes))
136+
preferredCount := 0 // Count of preferred candidates seen (for reservoir sampling).
137+
138+
for _, node := range nodes {
139+
remoteMeta := EncodedNodeMetadata(node.Meta)
140+
remoteZone := remoteMeta.Zone()
141+
remoteRole := remoteMeta.Role()
142+
143+
// Track zones to check if any zone has members but no alive bridges.
144+
if remoteZone != "" {
145+
if remoteRole == NodeRoleBridge {
146+
// Only count alive bridges.
147+
if node.State == memberlist.StateAlive {
148+
if !slices.Contains(zonesWithAliveBridges, remoteZone) {
149+
zonesWithAliveBridges = append(zonesWithAliveBridges, remoteZone)
150+
}
151+
}
152+
} else {
153+
if !slices.Contains(zonesWithMembers, remoteZone) {
154+
zonesWithMembers = append(zonesWithMembers, remoteZone)
155+
}
156+
}
157+
}
158+
159+
// Apply zone-aware selection.
160+
isSelected, isPreferred := d.selectNode(remoteZone, remoteRole)
161+
if isSelected {
162+
selected = append(selected, node)
163+
if isPreferred {
164+
preferredCount++
165+
// Reservoir sampling: select this node with a probability of 1/preferredCount.
166+
if rand.Intn(preferredCount) == 0 {
167+
preferred = node
168+
}
169+
}
170+
}
171+
}
172+
173+
// Skip zone-aware routing if any zone has members but no alive bridges.
174+
// This prevents network partitioning when bridges are missing or dead.
175+
for _, zone := range zonesWithMembers {
176+
if !slices.Contains(zonesWithAliveBridges, zone) {
177+
d.selectNodesCallsSkipped.Inc()
178+
level.Warn(d.logger).Log("msg", "memberlist zone-aware routing is skipped because a zone has no alive bridge", "zone", zone)
179+
return nodes, nil
180+
}
181+
}
106182

107-
// If either the local zone or the remote zone are unknown, select the node but don't prefer it.
183+
return selected, preferred
184+
}
185+
186+
// selectNode determines whether a remote node should be selected for gossip operations
187+
// and whether it should be considered a preferred candidate.
188+
func (d *zoneAwareNodeSelectionDelegate) selectNode(remoteZone string, remoteRole NodeRole) (selected, preferredCandidate bool) {
189+
// If the remote zone is unknown, select the node but don't prefer it.
108190
// This prevents network partitioning: if every other memberlist node filters it out, then that
109191
// remote node would not receive updates and would get isolated.
110-
if d.localZone == "" || remoteZone == "" {
192+
if remoteZone == "" {
111193
return true, false
112194
}
113195

@@ -132,8 +214,6 @@ func (d *zoneAwareNodeSelectionDelegate) SelectNode(node memberlist.Node) (selec
132214
return false, false
133215

134216
default:
135-
level.Warn(d.logger).Log("msg", "memberlist zone-aware routing is running with an unknown role", "role", d.localRole)
136-
137217
// Unknown role: select but don't prefer (should never happen).
138218
return true, false
139219
}

0 commit comments

Comments
 (0)