@@ -112,6 +112,14 @@ const (
112112 disableFighters
113113)
114114
115+ // packetKind is the kind of packet being sent through DERP
116+ type packetKind string
117+
118+ const (
119+ packetKindDisco packetKind = "disco"
120+ packetKindOther packetKind = "other"
121+ )
122+
115123type align64 [0 ]atomic.Int64 // for side effect of its 64-bit alignment
116124
117125// Server is a DERP server.
@@ -131,44 +139,37 @@ type Server struct {
131139 debug bool
132140
133141 // Counters:
134- packetsSent , bytesSent expvar.Int
135- packetsRecv , bytesRecv expvar.Int
136- packetsRecvByKind metrics.LabelMap
137- packetsRecvDisco * expvar.Int
138- packetsRecvOther * expvar.Int
139- _ align64
140- packetsDropped expvar.Int
141- packetsDroppedReason metrics.LabelMap
142- packetsDroppedReasonCounters []* expvar.Int // indexed by dropReason
143- packetsDroppedType metrics.LabelMap
144- packetsDroppedTypeDisco * expvar.Int
145- packetsDroppedTypeOther * expvar.Int
146- _ align64
147- packetsForwardedOut expvar.Int
148- packetsForwardedIn expvar.Int
149- peerGoneDisconnectedFrames expvar.Int // number of peer disconnected frames sent
150- peerGoneNotHereFrames expvar.Int // number of peer not here frames sent
151- gotPing expvar.Int // number of ping frames from client
152- sentPong expvar.Int // number of pong frames enqueued to client
153- accepts expvar.Int
154- curClients expvar.Int
155- curClientsNotIdeal expvar.Int
156- curHomeClients expvar.Int // ones with preferred
157- dupClientKeys expvar.Int // current number of public keys we have 2+ connections for
158- dupClientConns expvar.Int // current number of connections sharing a public key
159- dupClientConnTotal expvar.Int // total number of accepted connections when a dup key existed
160- unknownFrames expvar.Int
161- homeMovesIn expvar.Int // established clients announce home server moves in
162- homeMovesOut expvar.Int // established clients announce home server moves out
163- multiForwarderCreated expvar.Int
164- multiForwarderDeleted expvar.Int
165- removePktForwardOther expvar.Int
166- sclientWriteTimeouts expvar.Int
167- avgQueueDuration * uint64 // In milliseconds; accessed atomically
168- tcpRtt metrics.LabelMap // histogram
169- meshUpdateBatchSize * metrics.Histogram
170- meshUpdateLoopCount * metrics.Histogram
171- bufferedWriteFrames * metrics.Histogram // how many sendLoop frames (or groups of related frames) get written per flush
142+ packetsSent , bytesSent expvar.Int
143+ packetsRecv , bytesRecv expvar.Int
144+ packetsRecvByKind metrics.LabelMap
145+ packetsRecvDisco * expvar.Int
146+ packetsRecvOther * expvar.Int
147+ _ align64
148+ packetsForwardedOut expvar.Int
149+ packetsForwardedIn expvar.Int
150+ peerGoneDisconnectedFrames expvar.Int // number of peer disconnected frames sent
151+ peerGoneNotHereFrames expvar.Int // number of peer not here frames sent
152+ gotPing expvar.Int // number of ping frames from client
153+ sentPong expvar.Int // number of pong frames enqueued to client
154+ accepts expvar.Int
155+ curClients expvar.Int
156+ curClientsNotIdeal expvar.Int
157+ curHomeClients expvar.Int // ones with preferred
158+ dupClientKeys expvar.Int // current number of public keys we have 2+ connections for
159+ dupClientConns expvar.Int // current number of connections sharing a public key
160+ dupClientConnTotal expvar.Int // total number of accepted connections when a dup key existed
161+ unknownFrames expvar.Int
162+ homeMovesIn expvar.Int // established clients announce home server moves in
163+ homeMovesOut expvar.Int // established clients announce home server moves out
164+ multiForwarderCreated expvar.Int
165+ multiForwarderDeleted expvar.Int
166+ removePktForwardOther expvar.Int
167+ sclientWriteTimeouts expvar.Int
168+ avgQueueDuration * uint64 // In milliseconds; accessed atomically
169+ tcpRtt metrics.LabelMap // histogram
170+ meshUpdateBatchSize * metrics.Histogram
171+ meshUpdateLoopCount * metrics.Histogram
172+ bufferedWriteFrames * metrics.Histogram // how many sendLoop frames (or groups of related frames) get written per flush
172173
173174 // verifyClientsLocalTailscaled only accepts client connections to the DERP
174175 // server if the clientKey is a known peer in the network, as specified by a
@@ -351,68 +352,93 @@ type Conn interface {
351352 SetWriteDeadline (time.Time ) error
352353}
353354
355+ var packetsDropped = metrics .NewMultiLabelMap [dropReasonKindLabels ](
356+ "derp_packets_dropped" ,
357+ "counter" ,
358+ "DERP packets dropped by reason and by kind" )
359+
354360// NewServer returns a new DERP server. It doesn't listen on its own.
355361// Connections are given to it via Server.Accept.
356362func NewServer (privateKey key.NodePrivate , logf logger.Logf ) * Server {
357363 var ms runtime.MemStats
358364 runtime .ReadMemStats (& ms )
359365
360366 s := & Server {
361- debug : envknob .Bool ("DERP_DEBUG_LOGS" ),
362- privateKey : privateKey ,
363- publicKey : privateKey .Public (),
364- logf : logf ,
365- limitedLogf : logger .RateLimitedFn (logf , 30 * time .Second , 5 , 100 ),
366- packetsRecvByKind : metrics.LabelMap {Label : "kind" },
367- packetsDroppedReason : metrics.LabelMap {Label : "reason" },
368- packetsDroppedType : metrics.LabelMap {Label : "type" },
369- clients : map [key.NodePublic ]* clientSet {},
370- clientsMesh : map [key.NodePublic ]PacketForwarder {},
371- netConns : map [Conn ]chan struct {}{},
372- memSys0 : ms .Sys ,
373- watchers : set.Set [* sclient ]{},
374- peerGoneWatchers : map [key.NodePublic ]set.HandleSet [func (key.NodePublic )]{},
375- avgQueueDuration : new (uint64 ),
376- tcpRtt : metrics.LabelMap {Label : "le" },
377- meshUpdateBatchSize : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 }),
378- meshUpdateLoopCount : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 }),
379- bufferedWriteFrames : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 15 , 20 , 25 , 50 , 100 }),
380- keyOfAddr : map [netip.AddrPort ]key.NodePublic {},
381- clock : tstime.StdClock {},
367+ debug : envknob .Bool ("DERP_DEBUG_LOGS" ),
368+ privateKey : privateKey ,
369+ publicKey : privateKey .Public (),
370+ logf : logf ,
371+ limitedLogf : logger .RateLimitedFn (logf , 30 * time .Second , 5 , 100 ),
372+ packetsRecvByKind : metrics.LabelMap {Label : "kind" },
373+ clients : map [key.NodePublic ]* clientSet {},
374+ clientsMesh : map [key.NodePublic ]PacketForwarder {},
375+ netConns : map [Conn ]chan struct {}{},
376+ memSys0 : ms .Sys ,
377+ watchers : set.Set [* sclient ]{},
378+ peerGoneWatchers : map [key.NodePublic ]set.HandleSet [func (key.NodePublic )]{},
379+ avgQueueDuration : new (uint64 ),
380+ tcpRtt : metrics.LabelMap {Label : "le" },
381+ meshUpdateBatchSize : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 }),
382+ meshUpdateLoopCount : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 }),
383+ bufferedWriteFrames : metrics .NewHistogram ([]float64 {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 15 , 20 , 25 , 50 , 100 }),
384+ keyOfAddr : map [netip.AddrPort ]key.NodePublic {},
385+ clock : tstime.StdClock {},
382386 }
383387 s .initMetacert ()
384- s .packetsRecvDisco = s .packetsRecvByKind .Get ("disco" )
385- s .packetsRecvOther = s .packetsRecvByKind .Get ("other" )
388+ s .packetsRecvDisco = s .packetsRecvByKind .Get (string ( packetKindDisco ) )
389+ s .packetsRecvOther = s .packetsRecvByKind .Get (string ( packetKindOther ) )
386390
387- s .packetsDroppedReasonCounters = s .genPacketsDroppedReasonCounters ()
388-
389- s .packetsDroppedTypeDisco = s .packetsDroppedType .Get ("disco" )
390- s .packetsDroppedTypeOther = s .packetsDroppedType .Get ("other" )
391+ genPacketsDroppedCounters ()
391392
392393 s .perClientSendQueueDepth = getPerClientSendQueueDepth ()
393394 return s
394395}
395396
396- func (s * Server ) genPacketsDroppedReasonCounters () []* expvar.Int {
397- getMetric := s .packetsDroppedReason .Get
398- ret := []* expvar.Int {
399- dropReasonUnknownDest : getMetric ("unknown_dest" ),
400- dropReasonUnknownDestOnFwd : getMetric ("unknown_dest_on_fwd" ),
401- dropReasonGoneDisconnected : getMetric ("gone_disconnected" ),
402- dropReasonQueueHead : getMetric ("queue_head" ),
403- dropReasonQueueTail : getMetric ("queue_tail" ),
404- dropReasonWriteError : getMetric ("write_error" ),
405- dropReasonDupClient : getMetric ("dup_client" ),
397+ func genPacketsDroppedCounters () {
398+ initMetrics := func (reason dropReason ) {
399+ packetsDropped .Add (dropReasonKindLabels {
400+ Kind : string (packetKindDisco ),
401+ Reason : string (reason ),
402+ }, 0 )
403+ packetsDropped .Add (dropReasonKindLabels {
404+ Kind : string (packetKindOther ),
405+ Reason : string (reason ),
406+ }, 0 )
407+ }
408+ getMetrics := func (reason dropReason ) []expvar.Var {
409+ return []expvar.Var {
410+ packetsDropped .Get (dropReasonKindLabels {
411+ Kind : string (packetKindDisco ),
412+ Reason : string (reason ),
413+ }),
414+ packetsDropped .Get (dropReasonKindLabels {
415+ Kind : string (packetKindOther ),
416+ Reason : string (reason ),
417+ }),
418+ }
406419 }
407- if len (ret ) != int (numDropReasons ) {
408- panic ("dropReason metrics out of sync" )
420+
421+ dropReasons := []dropReason {
422+ dropReasonUnknownDest ,
423+ dropReasonUnknownDestOnFwd ,
424+ dropReasonGoneDisconnected ,
425+ dropReasonQueueHead ,
426+ dropReasonQueueTail ,
427+ dropReasonWriteError ,
428+ dropReasonDupClient ,
409429 }
410- for i := range numDropReasons {
411- if ret [i ] == nil {
430+
431+ for _ , dr := range dropReasons {
432+ initMetrics (dr )
433+ m := getMetrics (dr )
434+ if len (m ) != 2 {
435+ panic ("dropReason metrics out of sync" )
436+ }
437+
438+ if m [0 ] == nil || m [1 ] == nil {
412439 panic ("dropReason metrics out of sync" )
413440 }
414441 }
415- return ret
416442}
417443
418444// SetMesh sets the pre-shared key that regional DERP servers used to mesh
@@ -1152,31 +1178,36 @@ func (c *sclient) debugLogf(format string, v ...any) {
11521178 }
11531179}
11541180
1155- // dropReason is why we dropped a DERP frame.
1156- type dropReason int
1181+ type dropReasonKindLabels struct {
1182+ Reason string // metric label corresponding to a given dropReason
1183+ Kind string // either `disco` or `other`
1184+ }
11571185
1158- //go:generate go run tailscale.com/cmd/addlicense -file dropreason_string.go go run golang.org/x/tools/cmd/stringer -type=dropReason -trimprefix=dropReason
1186+ // dropReason is why we dropped a DERP frame.
1187+ type dropReason string
11591188
11601189const (
1161- dropReasonUnknownDest dropReason = iota // unknown destination pubkey
1162- dropReasonUnknownDestOnFwd // unknown destination pubkey on a derp-forwarded packet
1163- dropReasonGoneDisconnected // destination tailscaled disconnected before we could send
1164- dropReasonQueueHead // destination queue is full, dropped packet at queue head
1165- dropReasonQueueTail // destination queue is full, dropped packet at queue tail
1166- dropReasonWriteError // OS write() failed
1167- dropReasonDupClient // the public key is connected 2+ times (active/active, fighting)
1168- numDropReasons // unused; keep last
1190+ dropReasonUnknownDest dropReason = "unknown_dest" // unknown destination pubkey
1191+ dropReasonUnknownDestOnFwd dropReason = "unknown_dest_on_fwd" // unknown destination pubkey on a derp-forwarded packet
1192+ dropReasonGoneDisconnected dropReason = "gone_disconnected" // destination tailscaled disconnected before we could send
1193+ dropReasonQueueHead dropReason = "queue_head" // destination queue is full, dropped packet at queue head
1194+ dropReasonQueueTail dropReason = "queue_tail" // destination queue is full, dropped packet at queue tail
1195+ dropReasonWriteError dropReason = "write_error" // OS write() failed
1196+ dropReasonDupClient dropReason = "dup_client" // the public key is connected 2+ times (active/active, fighting)
11691197)
11701198
11711199func (s * Server ) recordDrop (packetBytes []byte , srcKey , dstKey key.NodePublic , reason dropReason ) {
1172- s .packetsDropped .Add (1 )
1173- s .packetsDroppedReasonCounters [reason ].Add (1 )
1200+ labels := dropReasonKindLabels {
1201+ Reason : string (reason ),
1202+ }
11741203 looksDisco := disco .LooksLikeDiscoWrapper (packetBytes )
11751204 if looksDisco {
1176- s . packetsDroppedTypeDisco . Add ( 1 )
1205+ labels . Kind = string ( packetKindDisco )
11771206 } else {
1178- s . packetsDroppedTypeOther . Add ( 1 )
1207+ labels . Kind = string ( packetKindOther )
11791208 }
1209+ packetsDropped .Add (labels , 1 )
1210+
11801211 if verboseDropKeys [dstKey ] {
11811212 // Preformat the log string prior to calling limitedLogf. The
11821213 // limiter acts based on the format string, and we want to
@@ -2095,9 +2126,6 @@ func (s *Server) ExpVar() expvar.Var {
20952126 m .Set ("accepts" , & s .accepts )
20962127 m .Set ("bytes_received" , & s .bytesRecv )
20972128 m .Set ("bytes_sent" , & s .bytesSent )
2098- m .Set ("packets_dropped" , & s .packetsDropped )
2099- m .Set ("counter_packets_dropped_reason" , & s .packetsDroppedReason )
2100- m .Set ("counter_packets_dropped_type" , & s .packetsDroppedType )
21012129 m .Set ("counter_packets_received_kind" , & s .packetsRecvByKind )
21022130 m .Set ("packets_sent" , & s .packetsSent )
21032131 m .Set ("packets_received" , & s .packetsRecv )
0 commit comments