Skip to content

Commit 9bfdfba

Browse files
authored
Backport reestablish relays from cert-v2 to release-1.9 (#1277)
1 parent ab81b62 commit 9bfdfba

File tree

5 files changed

+288
-126
lines changed

5 files changed

+288
-126
lines changed

e2e/handshakes_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@ package e2e
66
import (
77
"fmt"
88
"net/netip"
9+
"slices"
910
"testing"
1011
"time"
1112

13+
"github.com/google/gopacket"
14+
"github.com/google/gopacket/layers"
1215
"github.com/sirupsen/logrus"
1316
"github.com/slackhq/nebula"
1417
"github.com/slackhq/nebula/e2e/router"
@@ -369,6 +372,137 @@ func TestRelays(t *testing.T) {
369372
//TODO: assert we actually used the relay even though it should be impossible for a tunnel to have occurred without it
370373
}
371374

375+
func TestReestablishRelays(t *testing.T) {
376+
ca, _, caKey, _ := NewTestCaCert(time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
377+
myControl, myVpnIpNet, _, _ := newSimpleServer(ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}})
378+
relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(ca, caKey, "relay ", "10.128.0.128/24", m{"relay": m{"am_relay": true}})
379+
theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(ca, caKey, "them ", "10.128.0.2/24", m{"relay": m{"use_relays": true}})
380+
381+
// Teach my how to get to the relay and that their can be reached via the relay
382+
myControl.InjectLightHouseAddr(relayVpnIpNet.Addr(), relayUdpAddr)
383+
myControl.InjectRelays(theirVpnIpNet.Addr(), []netip.Addr{relayVpnIpNet.Addr()})
384+
relayControl.InjectLightHouseAddr(theirVpnIpNet.Addr(), theirUdpAddr)
385+
386+
// Build a router so we don't have to reason who gets which packet
387+
r := router.NewR(t, myControl, relayControl, theirControl)
388+
defer r.RenderFlow()
389+
390+
// Start the servers
391+
myControl.Start()
392+
relayControl.Start()
393+
theirControl.Start()
394+
395+
t.Log("Trigger a handshake from me to them via the relay")
396+
myControl.InjectTunUDPPacket(theirVpnIpNet.Addr(), 80, 80, []byte("Hi from me"))
397+
398+
p := r.RouteForAllUntilTxTun(theirControl)
399+
r.Log("Assert the tunnel works")
400+
assertUdpPacket(t, []byte("Hi from me"), p, myVpnIpNet.Addr(), theirVpnIpNet.Addr(), 80, 80)
401+
402+
t.Log("Ensure packet traversal from them to me via the relay")
403+
theirControl.InjectTunUDPPacket(myVpnIpNet.Addr(), 80, 80, []byte("Hi from them"))
404+
405+
p = r.RouteForAllUntilTxTun(myControl)
406+
r.Log("Assert the tunnel works")
407+
assertUdpPacket(t, []byte("Hi from them"), p, theirVpnIpNet.Addr(), myVpnIpNet.Addr(), 80, 80)
408+
409+
// If we break the relay's connection to 'them', 'me' needs to detect and recover the connection
410+
r.Log("Close the tunnel")
411+
relayControl.CloseTunnel(theirVpnIpNet.Addr(), true)
412+
413+
start := len(myControl.GetHostmap().Indexes)
414+
curIndexes := len(myControl.GetHostmap().Indexes)
415+
for curIndexes >= start {
416+
curIndexes = len(myControl.GetHostmap().Indexes)
417+
r.Logf("Wait for the dead index to go away:start=%v indexes, currnet=%v indexes", start, curIndexes)
418+
myControl.InjectTunUDPPacket(theirVpnIpNet.Addr(), 80, 80, []byte("Hi from me should fail"))
419+
420+
r.RouteForAllExitFunc(func(p *udp.Packet, c *nebula.Control) router.ExitType {
421+
return router.RouteAndExit
422+
})
423+
time.Sleep(2 * time.Second)
424+
}
425+
r.Log("Dead index went away. Woot!")
426+
r.RenderHostmaps("Me removed hostinfo", myControl, relayControl, theirControl)
427+
// Next packet should re-establish a relayed connection and work just great.
428+
429+
t.Logf("Assert the tunnel...")
430+
for {
431+
t.Log("RouteForAllUntilTxTun")
432+
myControl.InjectLightHouseAddr(relayVpnIpNet.Addr(), relayUdpAddr)
433+
myControl.InjectRelays(theirVpnIpNet.Addr(), []netip.Addr{relayVpnIpNet.Addr()})
434+
relayControl.InjectLightHouseAddr(theirVpnIpNet.Addr(), theirUdpAddr)
435+
myControl.InjectTunUDPPacket(theirVpnIpNet.Addr(), 80, 80, []byte("Hi from me"))
436+
437+
p = r.RouteForAllUntilTxTun(theirControl)
438+
r.Log("Assert the tunnel works")
439+
packet := gopacket.NewPacket(p, layers.LayerTypeIPv4, gopacket.Lazy)
440+
v4 := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4)
441+
if slices.Compare(v4.SrcIP, myVpnIpNet.Addr().AsSlice()) != 0 {
442+
t.Logf("SrcIP is unexpected...this is not the packet I'm looking for. Keep looking")
443+
continue
444+
}
445+
if slices.Compare(v4.DstIP, theirVpnIpNet.Addr().AsSlice()) != 0 {
446+
t.Logf("DstIP is unexpected...this is not the packet I'm looking for. Keep looking")
447+
continue
448+
}
449+
450+
udp := packet.Layer(layers.LayerTypeUDP).(*layers.UDP)
451+
if udp == nil {
452+
t.Log("Not a UDP packet. This is not the packet I'm looking for. Keep looking")
453+
continue
454+
}
455+
data := packet.ApplicationLayer()
456+
if data == nil {
457+
t.Log("No data found in packet. This is not the packet I'm looking for. Keep looking.")
458+
continue
459+
}
460+
if string(data.Payload()) != "Hi from me" {
461+
t.Logf("Unexpected payload: '%v', keep looking", string(data.Payload()))
462+
continue
463+
}
464+
t.Log("I found my lost packet. I am so happy.")
465+
break
466+
}
467+
t.Log("Assert the tunnel works the other way, too")
468+
for {
469+
t.Log("RouteForAllUntilTxTun")
470+
theirControl.InjectTunUDPPacket(myVpnIpNet.Addr(), 80, 80, []byte("Hi from them"))
471+
472+
p = r.RouteForAllUntilTxTun(myControl)
473+
r.Log("Assert the tunnel works")
474+
packet := gopacket.NewPacket(p, layers.LayerTypeIPv4, gopacket.Lazy)
475+
v4 := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4)
476+
if slices.Compare(v4.DstIP, myVpnIpNet.Addr().AsSlice()) != 0 {
477+
t.Logf("Dst is unexpected...this is not the packet I'm looking for. Keep looking")
478+
continue
479+
}
480+
if slices.Compare(v4.SrcIP, theirVpnIpNet.Addr().AsSlice()) != 0 {
481+
t.Logf("SrcIP is unexpected...this is not the packet I'm looking for. Keep looking")
482+
continue
483+
}
484+
485+
udp := packet.Layer(layers.LayerTypeUDP).(*layers.UDP)
486+
if udp == nil {
487+
t.Log("Not a UDP packet. This is not the packet I'm looking for. Keep looking")
488+
continue
489+
}
490+
data := packet.ApplicationLayer()
491+
if data == nil {
492+
t.Log("No data found in packet. This is not the packet I'm looking for. Keep looking.")
493+
continue
494+
}
495+
if string(data.Payload()) != "Hi from them" {
496+
t.Logf("Unexpected payload: '%v', keep looking", string(data.Payload()))
497+
continue
498+
}
499+
t.Log("I found my lost packet. I am so happy.")
500+
break
501+
}
502+
r.RenderHostmaps("Final hostmaps", myControl, relayControl, theirControl)
503+
504+
}
505+
372506
func TestStage1RaceRelays(t *testing.T) {
373507
//NOTE: this is a race between me and relay resulting in a full tunnel from me to them via relay
374508
ca, _, caKey, _ := NewTestCaCert(time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})

handshake_ix.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,9 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
322322
return
323323
}
324324
hostinfo.relayState.InsertRelayTo(via.relayHI.vpnIp)
325+
// I successfully received a handshake. Just in case I marked this tunnel as 'Disestablished', ensure
326+
// it's correctly marked as working.
327+
via.relayHI.relayState.UpdateRelayForByIdxState(via.remoteIdx, Established)
325328
f.SendVia(via.relayHI, via.relay, msg, make([]byte, 12), make([]byte, mtu), false)
326329
f.l.WithField("vpnIp", vpnIp).WithField("relay", via.relayHI.vpnIp).
327330
WithField("certName", certName).

handshake_manager.go

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -278,48 +278,8 @@ func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered
278278
continue
279279
}
280280
// Check the relay HostInfo to see if we already established a relay through it
281-
if existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp); ok {
282-
switch existingRelay.State {
283-
case Established:
284-
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Send handshake via relay")
285-
hm.f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
286-
case Requested:
287-
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
288-
289-
//TODO: IPV6-WORK
290-
myVpnIpB := hm.f.myVpnNet.Addr().As4()
291-
theirVpnIpB := vpnIp.As4()
292-
293-
// Re-send the CreateRelay request, in case the previous one was lost.
294-
m := NebulaControl{
295-
Type: NebulaControl_CreateRelayRequest,
296-
InitiatorRelayIndex: existingRelay.LocalIndex,
297-
RelayFromIp: binary.BigEndian.Uint32(myVpnIpB[:]),
298-
RelayToIp: binary.BigEndian.Uint32(theirVpnIpB[:]),
299-
}
300-
msg, err := m.Marshal()
301-
if err != nil {
302-
hostinfo.logger(hm.l).
303-
WithError(err).
304-
Error("Failed to marshal Control message to create relay")
305-
} else {
306-
// This must send over the hostinfo, not over hm.Hosts[ip]
307-
hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
308-
hm.l.WithFields(logrus.Fields{
309-
"relayFrom": hm.f.myVpnNet.Addr(),
310-
"relayTo": vpnIp,
311-
"initiatorRelayIndex": existingRelay.LocalIndex,
312-
"relay": relay}).
313-
Info("send CreateRelayRequest")
314-
}
315-
default:
316-
hostinfo.logger(hm.l).
317-
WithField("vpnIp", vpnIp).
318-
WithField("state", existingRelay.State).
319-
WithField("relay", relayHostInfo.vpnIp).
320-
Errorf("Relay unexpected state")
321-
}
322-
} else {
281+
existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp)
282+
if !ok {
323283
// No relays exist or requested yet.
324284
if relayHostInfo.remote.IsValid() {
325285
idx, err := AddRelay(hm.l, relayHostInfo, hm.mainHostMap, vpnIp, nil, TerminalType, Requested)
@@ -352,6 +312,52 @@ func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered
352312
Info("send CreateRelayRequest")
353313
}
354314
}
315+
continue
316+
}
317+
switch existingRelay.State {
318+
case Established:
319+
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Send handshake via relay")
320+
hm.f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
321+
case Disestablished:
322+
// Mark this relay as 'requested'
323+
relayHostInfo.relayState.UpdateRelayForByIpState(vpnIp, Requested)
324+
fallthrough
325+
case Requested:
326+
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
327+
// Re-send the CreateRelay request, in case the previous one was lost.
328+
relayFrom := hm.f.myVpnNet.Addr().As4()
329+
relayTo := vpnIp.As4()
330+
m := NebulaControl{
331+
Type: NebulaControl_CreateRelayRequest,
332+
InitiatorRelayIndex: existingRelay.LocalIndex,
333+
RelayFromIp: binary.BigEndian.Uint32(relayFrom[:]),
334+
RelayToIp: binary.BigEndian.Uint32(relayTo[:]),
335+
}
336+
337+
msg, err := m.Marshal()
338+
if err != nil {
339+
hostinfo.logger(hm.l).
340+
WithError(err).
341+
Error("Failed to marshal Control message to create relay")
342+
} else {
343+
// This must send over the hostinfo, not over hm.Hosts[ip]
344+
hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
345+
hm.l.WithFields(logrus.Fields{
346+
"relayFrom": hm.f.myVpnNet,
347+
"relayTo": vpnIp,
348+
"initiatorRelayIndex": existingRelay.LocalIndex,
349+
"relay": relay}).
350+
Info("send CreateRelayRequest")
351+
}
352+
case PeerRequested:
353+
// PeerRequested only occurs in Forwarding relays, not Terminal relays, and this is a Terminal relay case.
354+
fallthrough
355+
default:
356+
hostinfo.logger(hm.l).
357+
WithField("vpnIp", vpnIp).
358+
WithField("state", existingRelay.State).
359+
WithField("relay", relay).
360+
Errorf("Relay unexpected state")
355361
}
356362
}
357363
}

hostmap.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ const (
3535
Requested = iota
3636
PeerRequested
3737
Established
38+
Disestablished
3839
)
3940

4041
const (
@@ -79,6 +80,28 @@ func (rs *RelayState) DeleteRelay(ip netip.Addr) {
7980
delete(rs.relays, ip)
8081
}
8182

83+
func (rs *RelayState) UpdateRelayForByIpState(vpnIp netip.Addr, state int) {
84+
rs.Lock()
85+
defer rs.Unlock()
86+
if r, ok := rs.relayForByIp[vpnIp]; ok {
87+
newRelay := *r
88+
newRelay.State = state
89+
rs.relayForByIp[newRelay.PeerIp] = &newRelay
90+
rs.relayForByIdx[newRelay.LocalIndex] = &newRelay
91+
}
92+
}
93+
94+
func (rs *RelayState) UpdateRelayForByIdxState(idx uint32, state int) {
95+
rs.Lock()
96+
defer rs.Unlock()
97+
if r, ok := rs.relayForByIdx[idx]; ok {
98+
newRelay := *r
99+
newRelay.State = state
100+
rs.relayForByIp[newRelay.PeerIp] = &newRelay
101+
rs.relayForByIdx[newRelay.LocalIndex] = &newRelay
102+
}
103+
}
104+
82105
func (rs *RelayState) CopyAllRelayFor() []*Relay {
83106
rs.RLock()
84107
defer rs.RUnlock()
@@ -361,6 +384,7 @@ func (hm *HostMap) unlockedMakePrimary(hostinfo *HostInfo) {
361384

362385
func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
363386
primary, ok := hm.Hosts[hostinfo.vpnIp]
387+
isLastHostinfo := hostinfo.next == nil && hostinfo.prev == nil
364388
if ok && primary == hostinfo {
365389
// The vpnIp pointer points to the same hostinfo as the local index id, we can remove it
366390
delete(hm.Hosts, hostinfo.vpnIp)
@@ -410,6 +434,12 @@ func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
410434
Debug("Hostmap hostInfo deleted")
411435
}
412436

437+
if isLastHostinfo {
438+
// I have lost connectivity to my peers. My relay tunnel is likely broken. Mark the next
439+
// hops as 'Disestablished' so that new relay tunnels are created in the future.
440+
hm.unlockedDisestablishVpnAddrRelayFor(hostinfo)
441+
}
442+
// Clean up any local relay indexes for which I am acting as a relay hop
413443
for _, localRelayIdx := range hostinfo.relayState.CopyRelayForIdxs() {
414444
delete(hm.Relays, localRelayIdx)
415445
}
@@ -470,6 +500,27 @@ func (hm *HostMap) QueryVpnIpRelayFor(targetIp, relayHostIp netip.Addr) (*HostIn
470500
return nil, nil, errors.New("unable to find host with relay")
471501
}
472502

503+
func (hm *HostMap) unlockedDisestablishVpnAddrRelayFor(hi *HostInfo) {
504+
for _, relayHostIp := range hi.relayState.CopyRelayIps() {
505+
if h, ok := hm.Hosts[relayHostIp]; ok {
506+
for h != nil {
507+
h.relayState.UpdateRelayForByIpState(hi.vpnIp, Disestablished)
508+
h = h.next
509+
}
510+
}
511+
}
512+
for _, rs := range hi.relayState.CopyAllRelayFor() {
513+
if rs.Type == ForwardingType {
514+
if h, ok := hm.Hosts[rs.PeerIp]; ok {
515+
for h != nil {
516+
h.relayState.UpdateRelayForByIpState(hi.vpnIp, Disestablished)
517+
h = h.next
518+
}
519+
}
520+
}
521+
}
522+
}
523+
473524
func (hm *HostMap) queryVpnIp(vpnIp netip.Addr, promoteIfce *Interface) *HostInfo {
474525
hm.RLock()
475526
if h, ok := hm.Hosts[vpnIp]; ok {

0 commit comments

Comments
 (0)