Skip to content

Commit e1dd922

Browse files
committed
ipn/ipnlocal, tka: compact TKA state after every sync
Previously a TKA compaction would only run when a node starts, which means a long-running node could use unbounded storage as it accumulates ever-increasing amounts of TKA state. This patch changes TKA so it runs a compaction after every sync. Updates tailscale/corp#33537 Change-Id: I91df887ea0c5a5b00cb6caced85aeffa2a4b24ee Signed-off-by: Alex Chan <[email protected]>
1 parent 38ccdbe commit e1dd922

File tree

10 files changed

+276
-31
lines changed

10 files changed

+276
-31
lines changed

ipn/ipnlocal/network-lock.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,13 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie
360360
if err := b.tkaSyncLocked(ourNodeKey); err != nil {
361361
return fmt.Errorf("tka sync: %w", err)
362362
}
363+
// Try to compact the TKA state, to avoid unbounded storage on nodes.
364+
//
365+
// We run this on every sync so that clients compact consistently. In many
366+
// cases this will be a no-op.
367+
if err := b.tka.authority.Compact(b.tka.storage, tkaCompactionDefaults); err != nil {
368+
return fmt.Errorf("tka compact: %w", err)
369+
}
363370
}
364371

365372
return nil
@@ -508,7 +515,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per
508515
if root == "" {
509516
b.health.SetUnhealthy(noNetworkLockStateDirWarnable, nil)
510517
b.logf("network-lock using in-memory storage; no state directory")
511-
storage = &tka.Mem{}
518+
storage = tka.ChonkMem()
512519
} else {
513520
chonkDir := b.chonkPathLocked()
514521
chonk, err := tka.ChonkDir(chonkDir)
@@ -686,7 +693,7 @@ func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byt
686693
// We use an in-memory tailchonk because we don't want to commit to
687694
// the filesystem until we've finished the initialization sequence,
688695
// just in case something goes wrong.
689-
_, genesisAUM, err := tka.Create(&tka.Mem{}, tka.State{
696+
_, genesisAUM, err := tka.Create(tka.ChonkMem(), tka.State{
690697
Keys: keys,
691698
// TODO(tom): s/tka.State.DisablementSecrets/tka.State.DisablementValues
692699
// This will center on consistent nomenclature:

ipn/ipnlocal/network-lock_test.go

Lines changed: 221 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"path/filepath"
1818
"reflect"
1919
"testing"
20+
"time"
2021

2122
go4mem "go4.org/mem"
2223

@@ -31,6 +32,7 @@ import (
3132
"tailscale.com/tailcfg"
3233
"tailscale.com/tka"
3334
"tailscale.com/tsd"
35+
"tailscale.com/tstest"
3436
"tailscale.com/types/key"
3537
"tailscale.com/types/netmap"
3638
"tailscale.com/types/persist"
@@ -89,7 +91,7 @@ func TestTKAEnablementFlow(t *testing.T) {
8991
// our mock server can communicate.
9092
nlPriv := key.NewNLPrivate()
9193
key := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2}
92-
a1, genesisAUM, err := tka.Create(&tka.Mem{}, tka.State{
94+
a1, genesisAUM, err := tka.Create(tka.ChonkMem(), tka.State{
9395
Keys: []tka.Key{key},
9496
DisablementSecrets: [][]byte{bytes.Repeat([]byte{0xa5}, 32)},
9597
}, nlPriv)
@@ -399,7 +401,7 @@ func TestTKASync(t *testing.T) {
399401

400402
// Setup the tka authority on the control plane.
401403
key := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2}
402-
controlStorage := &tka.Mem{}
404+
controlStorage := tka.ChonkMem()
403405
controlAuthority, bootstrap, err := tka.Create(controlStorage, tka.State{
404406
Keys: []tka.Key{key, someKey},
405407
DisablementSecrets: [][]byte{tka.DisablementKDF(disablementSecret)},
@@ -548,10 +550,226 @@ func TestTKASync(t *testing.T) {
548550
}
549551
}
550552

553+
// Whenever we run a TKA sync and get new state from control, we compact the
554+
// local state.
555+
func TestTKASyncTriggersCompact(t *testing.T) {
556+
someKeyPriv := key.NewNLPrivate()
557+
someKey := tka.Key{Kind: tka.Key25519, Public: someKeyPriv.Public().Verifier(), Votes: 1}
558+
559+
disablementSecret := bytes.Repeat([]byte{0xa5}, 32)
560+
561+
nodePriv := key.NewNode()
562+
nlPriv := key.NewNLPrivate()
563+
pm := must.Get(newProfileManager(new(mem.Store), t.Logf, health.NewTracker(eventbustest.NewBus(t))))
564+
must.Do(pm.SetPrefs((&ipn.Prefs{
565+
Persist: &persist.Persist{
566+
PrivateNodeKey: nodePriv,
567+
NetworkLockKey: nlPriv,
568+
},
569+
}).View(), ipn.NetworkProfile{}))
570+
571+
// Create a clock, and roll it back by 30 days.
572+
//
573+
// Our compaction algorithm preserves AUMs received in the last 14 days, so
574+
// we need to backdate the commit times to make the AUMs eligible for compaction.
575+
clock := tstest.NewClock(tstest.ClockOpts{})
576+
clock.Advance(-30 * 24 * time.Hour)
577+
578+
// Set up the TKA authority on the control plane.
579+
key := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2}
580+
controlStorage := tka.ChonkMem()
581+
controlStorage.SetClock(clock)
582+
controlAuthority, bootstrap, err := tka.Create(controlStorage, tka.State{
583+
Keys: []tka.Key{key, someKey},
584+
DisablementSecrets: [][]byte{tka.DisablementKDF(disablementSecret)},
585+
}, nlPriv)
586+
if err != nil {
587+
t.Fatalf("tka.Create() failed: %v", err)
588+
}
589+
590+
// Fill the control plane TKA authority with a lot of AUMs, enough so that:
591+
//
592+
// 1. the chain of AUMs includes some checkpoints
593+
// 2. the chain is long enough it would be trimmed if we ran the compaction
594+
// algorithm with the defaults
595+
for range 100 {
596+
upd := controlAuthority.NewUpdater(nlPriv)
597+
if err := upd.RemoveKey(someKey.MustID()); err != nil {
598+
t.Fatalf("RemoveKey: %v", err)
599+
}
600+
if err := upd.AddKey(someKey); err != nil {
601+
t.Fatalf("AddKey: %v", err)
602+
}
603+
aums, err := upd.Finalize(controlStorage)
604+
if err != nil {
605+
t.Fatalf("Finalize: %v", err)
606+
}
607+
if err := controlAuthority.Inform(controlStorage, aums); err != nil {
608+
t.Fatalf("controlAuthority.Inform() failed: %v", err)
609+
}
610+
}
611+
612+
// Set up the TKA authority on the node.
613+
nodeStorage := tka.ChonkMem()
614+
nodeStorage.SetClock(clock)
615+
nodeAuthority, err := tka.Bootstrap(nodeStorage, bootstrap)
616+
if err != nil {
617+
t.Fatalf("tka.Bootstrap() failed: %v", err)
618+
}
619+
620+
// Make a mock control server.
621+
ts, client := fakeNoiseServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
622+
defer r.Body.Close()
623+
switch r.URL.Path {
624+
case "/machine/tka/sync/offer":
625+
body := new(tailcfg.TKASyncOfferRequest)
626+
if err := json.NewDecoder(r.Body).Decode(body); err != nil {
627+
t.Fatal(err)
628+
}
629+
t.Logf("got sync offer:\n%+v", body)
630+
nodeOffer, err := toSyncOffer(body.Head, body.Ancestors)
631+
if err != nil {
632+
t.Fatal(err)
633+
}
634+
controlOffer, err := controlAuthority.SyncOffer(controlStorage)
635+
if err != nil {
636+
t.Fatal(err)
637+
}
638+
sendAUMs, err := controlAuthority.MissingAUMs(controlStorage, nodeOffer)
639+
if err != nil {
640+
t.Fatal(err)
641+
}
642+
643+
head, ancestors, err := fromSyncOffer(controlOffer)
644+
if err != nil {
645+
t.Fatal(err)
646+
}
647+
resp := tailcfg.TKASyncOfferResponse{
648+
Head: head,
649+
Ancestors: ancestors,
650+
MissingAUMs: make([]tkatype.MarshaledAUM, len(sendAUMs)),
651+
}
652+
for i, a := range sendAUMs {
653+
resp.MissingAUMs[i] = a.Serialize()
654+
}
655+
656+
t.Logf("responding to sync offer with:\n%+v", resp)
657+
w.WriteHeader(200)
658+
if err := json.NewEncoder(w).Encode(resp); err != nil {
659+
t.Fatal(err)
660+
}
661+
662+
case "/machine/tka/sync/send":
663+
body := new(tailcfg.TKASyncSendRequest)
664+
if err := json.NewDecoder(r.Body).Decode(body); err != nil {
665+
t.Fatal(err)
666+
}
667+
t.Logf("got sync send:\n%+v", body)
668+
669+
var remoteHead tka.AUMHash
670+
if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil {
671+
t.Fatalf("head unmarshal: %v", err)
672+
}
673+
toApply := make([]tka.AUM, len(body.MissingAUMs))
674+
for i, a := range body.MissingAUMs {
675+
if err := toApply[i].Unserialize(a); err != nil {
676+
t.Fatalf("decoding missingAUM[%d]: %v", i, err)
677+
}
678+
}
679+
680+
if len(toApply) > 0 {
681+
if err := controlAuthority.Inform(controlStorage, toApply); err != nil {
682+
t.Fatalf("control.Inform(%+v) failed: %v", toApply, err)
683+
}
684+
}
685+
head, err := controlAuthority.Head().MarshalText()
686+
if err != nil {
687+
t.Fatal(err)
688+
}
689+
690+
w.WriteHeader(200)
691+
if err := json.NewEncoder(w).Encode(tailcfg.TKASyncSendResponse{
692+
Head: string(head),
693+
}); err != nil {
694+
t.Fatal(err)
695+
}
696+
697+
default:
698+
t.Errorf("unhandled endpoint path: %v", r.URL.Path)
699+
w.WriteHeader(404)
700+
}
701+
}))
702+
defer ts.Close()
703+
704+
// Setup the client.
705+
cc, _ := fakeControlClient(t, client)
706+
b := LocalBackend{
707+
cc: cc,
708+
ccAuto: cc,
709+
logf: t.Logf,
710+
pm: pm,
711+
store: pm.Store(),
712+
tka: &tkaState{
713+
authority: nodeAuthority,
714+
storage: nodeStorage,
715+
},
716+
}
717+
718+
// Trigger a sync.
719+
err = b.tkaSyncIfNeeded(&netmap.NetworkMap{
720+
TKAEnabled: true,
721+
TKAHead: controlAuthority.Head(),
722+
}, pm.CurrentPrefs())
723+
if err != nil {
724+
t.Errorf("tkaSyncIfNeeded() failed: %v", err)
725+
}
726+
727+
// Add a new AUM in control.
728+
upd := controlAuthority.NewUpdater(nlPriv)
729+
if err := upd.RemoveKey(someKey.MustID()); err != nil {
730+
t.Fatalf("RemoveKey: %v", err)
731+
}
732+
aums, err := upd.Finalize(controlStorage)
733+
if err != nil {
734+
t.Fatalf("Finalize: %v", err)
735+
}
736+
if err := controlAuthority.Inform(controlStorage, aums); err != nil {
737+
t.Fatalf("controlAuthority.Inform() failed: %v", err)
738+
}
739+
740+
// Run a second sync, which should trigger a compaction.
741+
err = b.tkaSyncIfNeeded(&netmap.NetworkMap{
742+
TKAEnabled: true,
743+
TKAHead: controlAuthority.Head(),
744+
}, pm.CurrentPrefs())
745+
if err != nil {
746+
t.Errorf("tkaSyncIfNeeded() failed: %v", err)
747+
}
748+
749+
// Check that the node and control plane are in sync.
750+
if nodeHead, controlHead := b.tka.authority.Head(), controlAuthority.Head(); nodeHead != controlHead {
751+
t.Errorf("node head = %v, want %v", nodeHead, controlHead)
752+
}
753+
754+
// Check the node has compacted away some of its AUMs; that it has purged some AUMs which
755+
// are still kept in the control plane.
756+
nodeAUMs, err := b.tka.storage.AllAUMs()
757+
if err != nil {
758+
t.Errorf("AllAUMs() for node failed: %v", err)
759+
}
760+
controlAUMS, err := controlStorage.AllAUMs()
761+
if err != nil {
762+
t.Errorf("AllAUMs() for control failed: %v", err)
763+
}
764+
if len(nodeAUMs) == len(controlAUMS) {
765+
t.Errorf("node has not compacted; it has the same number of AUMs as control (node = control = %d)", len(nodeAUMs))
766+
}
767+
}
768+
551769
func TestTKAFilterNetmap(t *testing.T) {
552770
nlPriv := key.NewNLPrivate()
553771
nlKey := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2}
554-
storage := &tka.Mem{}
772+
storage := tka.ChonkMem()
555773
authority, _, err := tka.Create(storage, tka.State{
556774
Keys: []tka.Key{nlKey},
557775
DisablementSecrets: [][]byte{bytes.Repeat([]byte{0xa5}, 32)},

tka/builder_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func TestAuthorityBuilderAddKey(t *testing.T) {
2828
pub, priv := testingKey25519(t, 1)
2929
key := Key{Kind: Key25519, Public: pub, Votes: 2}
3030

31-
storage := &Mem{}
31+
storage := ChonkMem()
3232
a, _, err := Create(storage, State{
3333
Keys: []Key{key},
3434
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -62,7 +62,7 @@ func TestAuthorityBuilderMaxKey(t *testing.T) {
6262
pub, priv := testingKey25519(t, 1)
6363
key := Key{Kind: Key25519, Public: pub, Votes: 2}
6464

65-
storage := &Mem{}
65+
storage := ChonkMem()
6666
a, _, err := Create(storage, State{
6767
Keys: []Key{key},
6868
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -109,7 +109,7 @@ func TestAuthorityBuilderRemoveKey(t *testing.T) {
109109
pub2, _ := testingKey25519(t, 2)
110110
key2 := Key{Kind: Key25519, Public: pub2, Votes: 1}
111111

112-
storage := &Mem{}
112+
storage := ChonkMem()
113113
a, _, err := Create(storage, State{
114114
Keys: []Key{key, key2},
115115
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -155,7 +155,7 @@ func TestAuthorityBuilderSetKeyVote(t *testing.T) {
155155
pub, priv := testingKey25519(t, 1)
156156
key := Key{Kind: Key25519, Public: pub, Votes: 2}
157157

158-
storage := &Mem{}
158+
storage := ChonkMem()
159159
a, _, err := Create(storage, State{
160160
Keys: []Key{key},
161161
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -191,7 +191,7 @@ func TestAuthorityBuilderSetKeyMeta(t *testing.T) {
191191
pub, priv := testingKey25519(t, 1)
192192
key := Key{Kind: Key25519, Public: pub, Votes: 2, Meta: map[string]string{"a": "b"}}
193193

194-
storage := &Mem{}
194+
storage := ChonkMem()
195195
a, _, err := Create(storage, State{
196196
Keys: []Key{key},
197197
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -227,7 +227,7 @@ func TestAuthorityBuilderMultiple(t *testing.T) {
227227
pub, priv := testingKey25519(t, 1)
228228
key := Key{Kind: Key25519, Public: pub, Votes: 2}
229229

230-
storage := &Mem{}
230+
storage := ChonkMem()
231231
a, _, err := Create(storage, State{
232232
Keys: []Key{key},
233233
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
@@ -275,7 +275,7 @@ func TestAuthorityBuilderCheckpointsAfterXUpdates(t *testing.T) {
275275
pub, priv := testingKey25519(t, 1)
276276
key := Key{Kind: Key25519, Public: pub, Votes: 2}
277277

278-
storage := &Mem{}
278+
storage := ChonkMem()
279279
a, _, err := Create(storage, State{
280280
Keys: []Key{key},
281281
DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},

tka/chaintest_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,25 +285,25 @@ func (c *testChain) makeAUM(v *testchainNode) AUM {
285285

286286
// Chonk returns a tailchonk containing all AUMs.
287287
func (c *testChain) Chonk() Chonk {
288-
var out Mem
288+
out := ChonkMem()
289289
for _, update := range c.AUMs {
290290
if err := out.CommitVerifiedAUMs([]AUM{update}); err != nil {
291291
panic(err)
292292
}
293293
}
294-
return &out
294+
return out
295295
}
296296

297297
// ChonkWith returns a tailchonk containing the named AUMs.
298298
func (c *testChain) ChonkWith(names ...string) Chonk {
299-
var out Mem
299+
out := ChonkMem()
300300
for _, name := range names {
301301
update := c.AUMs[name]
302302
if err := out.CommitVerifiedAUMs([]AUM{update}); err != nil {
303303
panic(err)
304304
}
305305
}
306-
return &out
306+
return out
307307
}
308308

309309
type testchainOpt struct {

tka/key_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func TestNLPrivate(t *testing.T) {
7272
// Test that key.NLPrivate implements Signer by making a new
7373
// authority.
7474
k := Key{Kind: Key25519, Public: pub.Verifier(), Votes: 1}
75-
_, aum, err := Create(&Mem{}, State{
75+
_, aum, err := Create(ChonkMem(), State{
7676
Keys: []Key{k},
7777
DisablementSecrets: [][]byte{bytes.Repeat([]byte{1}, 32)},
7878
}, p)

0 commit comments

Comments
 (0)