Skip to content

Commit 0aa6ff5

Browse files
[release-22.0] Add code to clear orphaned files for a deleted keyspace (#18098) (#18115)
Signed-off-by: Manan Gupta <[email protected]> Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
1 parent c97627f commit 0aa6ff5

File tree

3 files changed

+229
-0
lines changed

3 files changed

+229
-0
lines changed

go/vt/topo/keyspace.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,41 @@ func (ts *Server) DeleteKeyspace(ctx context.Context, keyspace string) error {
393393
return nil
394394
}
395395

396+
// DeleteOrphanedKeyspaceFiles clears the residual files for a given keyspace in a cell.
397+
func (ts *Server) DeleteOrphanedKeyspaceFiles(ctx context.Context, cell string, keyspace string) error {
398+
conn, err := ts.ConnForCell(ctx, cell)
399+
if err != nil {
400+
return err
401+
}
402+
403+
dirsToClear := []string{path.Join(KeyspacesPath, keyspace)}
404+
for len(dirsToClear) > 0 {
405+
dir := dirsToClear[len(dirsToClear)-1]
406+
dirsToClear = dirsToClear[0 : len(dirsToClear)-1]
407+
408+
children, err := conn.ListDir(ctx, dir, true)
409+
if err != nil {
410+
if IsErrType(err, NoNode) {
411+
continue
412+
}
413+
return err
414+
}
415+
416+
for _, child := range children {
417+
childPath := path.Join(dir, child.Name)
418+
if child.Type == TypeDirectory {
419+
dirsToClear = append(dirsToClear, childPath)
420+
continue
421+
}
422+
err = conn.Delete(ctx, childPath, nil)
423+
if err != nil {
424+
return err
425+
}
426+
}
427+
}
428+
return nil
429+
}
430+
396431
// GetKeyspaces returns the list of keyspaces in the topology.
397432
func (ts *Server) GetKeyspaces(ctx context.Context) ([]string, error) {
398433
if ctx.Err() != nil {

go/vt/topo/keyspace_test.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
Copyright 2025 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package topo_test
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/stretchr/testify/require"
24+
25+
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
26+
"vitess.io/vitess/go/vt/topo"
27+
"vitess.io/vitess/go/vt/topo/memorytopo"
28+
)
29+
30+
func TestDeleteOrphanedKeyspaceFiles(t *testing.T) {
31+
cell := "zone-1"
32+
cell2 := "zone-2"
33+
keyspace := "ks"
34+
keyspace2 := "ks2"
35+
ctx := context.Background()
36+
tests := []struct {
37+
name string
38+
setup func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory)
39+
verify func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory)
40+
}{
41+
{
42+
name: "No orphaned files to delete",
43+
setup: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
44+
// We don't create anything that needs to be cleared.
45+
},
46+
verify: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
47+
// We check that we only get one ListDir call.
48+
require.EqualValues(t, 1, mtf.GetCallStats().Counts()["ListDir"])
49+
},
50+
}, {
51+
name: "Single orphaned file to delete",
52+
setup: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
53+
err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, "0", func(sr *topodatapb.ShardReplication) error {
54+
sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{TabletAlias: &topodatapb.TabletAlias{Cell: cell, Uid: 0}})
55+
return nil
56+
})
57+
require.NoError(t, err)
58+
// Verify that getting srvKeyspaceNames now gives us this keyspace.
59+
keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell)
60+
require.NoError(t, err)
61+
require.EqualValues(t, 1, len(keyspaces))
62+
require.EqualValues(t, keyspace, keyspaces[0])
63+
// Also verify that we can't get the SrvKeyspace.
64+
_, err = ts.GetSrvKeyspace(ctx, cell, keyspace)
65+
require.Error(t, err)
66+
require.True(t, topo.IsErrType(err, topo.NoNode))
67+
mtf.GetCallStats().ResetAll()
68+
},
69+
verify: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
70+
// We check that we only get 3 ListDir calls -
71+
// - keyspaces/ks
72+
// - keyspaces/ks/shards
73+
// - keyspaces/ks/shards/0
74+
require.EqualValues(t, 3, mtf.GetCallStats().Counts()["ListDir"])
75+
// We check that we delete the shard replication file
76+
// - keyspaces/ks/shards/0/ShardReplication
77+
require.EqualValues(t, 1, mtf.GetCallStats().Counts()["Delete"])
78+
// Verify that getting srvKeyspaceNames is now empty.
79+
keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell)
80+
require.NoError(t, err)
81+
require.Len(t, keyspaces, 0)
82+
},
83+
}, {
84+
name: "Ensure we don't delete extra files",
85+
setup: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
86+
err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, "0", func(sr *topodatapb.ShardReplication) error {
87+
sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{TabletAlias: &topodatapb.TabletAlias{Cell: cell, Uid: 0}})
88+
return nil
89+
})
90+
require.NoError(t, err)
91+
err = ts.CreateKeyspace(ctx, keyspace2, &topodatapb.Keyspace{KeyspaceType: topodatapb.KeyspaceType_NORMAL})
92+
require.NoError(t, err)
93+
err = ts.UpdateShardReplicationFields(ctx, cell, keyspace2, "0", func(sr *topodatapb.ShardReplication) error {
94+
sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{TabletAlias: &topodatapb.TabletAlias{Cell: cell, Uid: 0}})
95+
return nil
96+
})
97+
require.NoError(t, err)
98+
},
99+
verify: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
100+
// Verify that we don't delete the files for the other keyspace.
101+
_, err := ts.GetKeyspace(ctx, keyspace2)
102+
require.NoError(t, err)
103+
_, err = ts.GetShardReplication(ctx, cell, keyspace2, "0")
104+
require.NoError(t, err)
105+
},
106+
}, {
107+
name: "Multiple orphaned files to delete",
108+
setup: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
109+
err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, "0", func(sr *topodatapb.ShardReplication) error {
110+
sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{TabletAlias: &topodatapb.TabletAlias{Cell: cell, Uid: 0}})
111+
return nil
112+
})
113+
require.NoError(t, err)
114+
err = ts.UpdateSrvKeyspace(ctx, cell, keyspace, &topodatapb.SrvKeyspace{
115+
Partitions: []*topodatapb.SrvKeyspace_KeyspacePartition{
116+
{
117+
ServedType: topodatapb.TabletType_PRIMARY,
118+
},
119+
},
120+
})
121+
require.NoError(t, err)
122+
err = ts.UpdateSrvKeyspace(ctx, cell2, keyspace, &topodatapb.SrvKeyspace{
123+
Partitions: []*topodatapb.SrvKeyspace_KeyspacePartition{
124+
{
125+
ServedType: topodatapb.TabletType_PRIMARY,
126+
},
127+
},
128+
})
129+
require.NoError(t, err)
130+
// Verify that getting srvKeyspaceNames now gives us this keyspace.
131+
keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell)
132+
require.NoError(t, err)
133+
require.EqualValues(t, 1, len(keyspaces))
134+
require.EqualValues(t, keyspace, keyspaces[0])
135+
// Also verify that we can't get the SrvKeyspace.
136+
_, err = ts.GetSrvKeyspace(ctx, cell, keyspace)
137+
require.NoError(t, err)
138+
mtf.GetCallStats().ResetAll()
139+
},
140+
verify: func(t *testing.T, ts *topo.Server, mtf *memorytopo.Factory) {
141+
// We check that we only get 3 ListDir calls -
142+
// - keyspaces/ks
143+
// - keyspaces/ks/shards
144+
// - keyspaces/ks/shards/0
145+
require.EqualValues(t, 3, mtf.GetCallStats().Counts()["ListDir"])
146+
// We check that we delete the shard replication file
147+
// - keyspaces/ks/shards/0/ShardReplication
148+
// - keyspaces/ks/SrvKeyspace
149+
require.EqualValues(t, 2, mtf.GetCallStats().Counts()["Delete"])
150+
// Verify that getting srvKeyspaceNames is now empty.
151+
keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell)
152+
require.NoError(t, err)
153+
require.Len(t, keyspaces, 0)
154+
// Since we only clear orphaned files for one cell, we should still see
155+
// they srvKeyspace in the other cell.
156+
keyspaces, err = ts.GetSrvKeyspaceNames(ctx, cell2)
157+
require.NoError(t, err)
158+
require.Len(t, keyspaces, 1)
159+
},
160+
},
161+
}
162+
for _, tt := range tests {
163+
t.Run(tt.name, func(t *testing.T) {
164+
ts, mtf := memorytopo.NewServerAndFactory(ctx, cell, cell2)
165+
tt.setup(t, ts, mtf)
166+
err := ts.DeleteOrphanedKeyspaceFiles(ctx, cell, keyspace)
167+
require.NoError(t, err)
168+
tt.verify(t, ts, mtf)
169+
})
170+
}
171+
}

go/vt/topotools/rebuild_keyspace.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ import (
3131

3232
// RebuildKeyspace rebuilds the serving graph data while locking out other changes.
3333
func RebuildKeyspace(ctx context.Context, log logutil.Logger, ts *topo.Server, keyspace string, cells []string, allowPartial bool) (err error) {
34+
_, err = ts.GetKeyspace(ctx, keyspace)
35+
if err != nil {
36+
// If we get an error trying to get the keyspace and it's not a
37+
// no node, error, we should fail.
38+
if !topo.IsErrType(err, topo.NoNode) {
39+
return err
40+
}
41+
// If the keyspace doesn't exist, we should delete the serving keyspace records.
42+
// This is to ensure that the serving graph is consistent with the topo.
43+
if err = deleteOrphanedFiles(ctx, ts, keyspace, cells); err != nil {
44+
return err
45+
}
46+
}
3447
ctx, unlock, lockErr := ts.LockKeyspace(ctx, keyspace, "RebuildKeyspace")
3548
if lockErr != nil {
3649
return lockErr
@@ -40,6 +53,16 @@ func RebuildKeyspace(ctx context.Context, log logutil.Logger, ts *topo.Server, k
4053
return RebuildKeyspaceLocked(ctx, log, ts, keyspace, cells, allowPartial)
4154
}
4255

56+
// deleteOrphanedFiles clears the residual records for a keyspace that has already been deleted.
57+
func deleteOrphanedFiles(ctx context.Context, ts *topo.Server, keyspace string, cells []string) error {
58+
for _, cell := range cells {
59+
if err := ts.DeleteOrphanedKeyspaceFiles(ctx, cell, keyspace); err != nil {
60+
return err
61+
}
62+
}
63+
return nil
64+
}
65+
4366
// RebuildKeyspaceLocked should only be used with an action lock on the keyspace
4467
// - otherwise the consistency of the serving graph data can't be
4568
// guaranteed.

0 commit comments

Comments
 (0)