Skip to content

Commit b3eca57

Browse files
committed
certsyncpod: Swap secret/cm directories atomically
Currently it can happen that cert-syncer replaces some of the secret/configmap files successfully and then fails. This can lead to problems when these are e.g. TLS cert/key files and the directory gets inconsistent. This may seem transient, but when cert-syncer is terminated in the middle, it can later fail to start as the whole kube-apiserver gets into a crash loop. This introduces a new staticpod.SwapDirectoriesAtomic, which uses unix.Renameat2 with RENAME_EXCHANGE flag set.
1 parent 4139e55 commit b3eca57

File tree

3 files changed

+183
-42
lines changed

3 files changed

+183
-42
lines changed

pkg/operator/staticpod/certsyncpod/certsync_controller.go

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
115115

116116
contentDir := getConfigMapDir(c.destinationDir, cm.Name)
117117

118-
data := map[string]string{}
118+
data := make(map[string]string, len(configMap.Data))
119+
dataBytes := make(map[string][]byte, len(configMap.Data))
119120
for filename := range configMap.Data {
120121
fullFilename := filepath.Join(contentDir, filename)
121122

@@ -128,6 +129,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
128129
}
129130

130131
data[filename] = string(existingContent)
132+
dataBytes[filename] = existingContent
131133
}
132134

133135
// Check if cached configmap differs
@@ -152,27 +154,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
152154
continue
153155
}
154156

155-
klog.Infof("Creating directory %q ...", contentDir)
156-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
157-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
158-
errors = append(errors, err)
159-
continue
160-
}
161-
for filename, content := range configMap.Data {
162-
fullFilename := filepath.Join(contentDir, filename)
163-
// if the existing is the same, do nothing
164-
if reflect.DeepEqual(data[fullFilename], content) {
165-
continue
166-
}
167-
168-
klog.Infof("Writing configmap manifest %q ...", fullFilename)
169-
if err := staticpod.WriteFileAtomic([]byte(content), 0644, fullFilename); err != nil {
170-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
171-
errors = append(errors, err)
172-
continue
173-
}
174-
}
175-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated configmap: %s/%s", configMap.Namespace, configMap.Name)
157+
errors = append(errors, c.writeData(configMap.Namespace, configMap.Name, "configmap", contentDir, dataBytes)...)
176158
}
177159

178160
klog.Infof("Syncing secrets: %v", c.secrets)
@@ -220,7 +202,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
220202

221203
contentDir := getSecretDir(c.destinationDir, s.Name)
222204

223-
data := map[string][]byte{}
205+
data := make(map[string][]byte, len(secret.Data))
224206
for filename := range secret.Data {
225207
fullFilename := filepath.Join(contentDir, filename)
226208

@@ -257,29 +239,64 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
257239
continue
258240
}
259241

260-
klog.Infof("Creating directory %q ...", contentDir)
261-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
262-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
242+
errors = append(errors, c.writeData(secret.Namespace, secret.Name, "secret", contentDir, data)...)
243+
}
244+
245+
return utilerrors.NewAggregate(errors)
246+
}
247+
248+
func (c *CertSyncController) writeData(
249+
objectNamespace, objectName, kind string,
250+
contentDir string, data map[string][]byte,
251+
) []error {
252+
var errors []error
253+
254+
// We are going to atomically swap the new data directory for the old one.
255+
// In case the target directory does not exist, create it so that the directory not existing is not a special case.
256+
klog.Infof("Ensuring directory %q exists ...", contentDir)
257+
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
258+
c.eventRecorder.Warningf("CertificateUpdateFailed",
259+
"Failed creating content directory for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
260+
errors = append(errors, err)
261+
return errors
262+
}
263+
264+
// Create a tmp source directory to be swapped.
265+
srcDir, err := os.MkdirTemp(filepath.Dir(contentDir), filepath.Base(contentDir)+"-*")
266+
if err != nil {
267+
c.eventRecorder.Warningf("CertificateUpdateFailed",
268+
"Failed to create source %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
269+
errors = append(errors, err)
270+
return errors
271+
}
272+
defer os.RemoveAll(srcDir)
273+
274+
// Populate the tmp directory with files.
275+
for filename, content := range data {
276+
// TODO: Fix permissions
277+
fullFilename := filepath.Join(srcDir, filename)
278+
klog.Infof("Writing %s manifest %q ...", kind, fullFilename)
279+
280+
if err := os.WriteFile(fullFilename, content, 0600); err != nil {
281+
c.eventRecorder.Warningf("CertificateUpdateFailed",
282+
"Failed writing file for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
263283
errors = append(errors, err)
264284
continue
265285
}
266-
for filename, content := range secret.Data {
267-
// TODO fix permissions
268-
fullFilename := filepath.Join(contentDir, filename)
269-
// if the existing is the same, do nothing
270-
if reflect.DeepEqual(data[fullFilename], content) {
271-
continue
272-
}
286+
}
287+
if len(errors) > 0 {
288+
return errors
289+
}
273290

274-
klog.Infof("Writing secret manifest %q ...", fullFilename)
275-
if err := staticpod.WriteFileAtomic(content, 0600, fullFilename); err != nil {
276-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
277-
errors = append(errors, err)
278-
continue
279-
}
280-
}
281-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated secret: %s/%s", secret.Namespace, secret.Name)
291+
// Swap directories atomically.
292+
if err := staticpod.SwapDirectoriesAtomic(contentDir, srcDir); err != nil {
293+
c.eventRecorder.Warningf("CertificateUpdateFailed",
294+
"Failed to enable new %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
295+
errors = append(errors, err)
296+
return errors
282297
}
283298

284-
return utilerrors.NewAggregate(errors)
299+
c.eventRecorder.Eventf("CertificateUpdated",
300+
"Wrote updated %s: %s/%s", kind, objectNamespace, objectName)
301+
return nil
285302
}

pkg/operator/staticpod/file_utils.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
8+
"golang.org/x/sys/unix"
79
)
810

911
func WriteFileAtomic(content []byte, filePerms os.FileMode, fullFilename string) error {
@@ -31,3 +33,20 @@ func writeTemporaryFile(content []byte, filePerms os.FileMode, fullFilename stri
3133
}
3234
return tmpfile.Name(), nil
3335
}
36+
37+
// SwapDirectoriesAtomic can be used to swap two directories atomically.
38+
func SwapDirectoriesAtomic(dirA, dirB string) error {
39+
fdA, err := unix.Open(dirA, unix.O_DIRECTORY, 0)
40+
if err != nil {
41+
return err
42+
}
43+
defer unix.Close(fdA)
44+
45+
fdB, err := unix.Open(dirB, unix.O_DIRECTORY, 0)
46+
if err != nil {
47+
return err
48+
}
49+
defer unix.Close(fdB)
50+
51+
return unix.Renameat2(fdA, dirA, fdB, dirB, unix.RENAME_EXCHANGE)
52+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package staticpod
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
)
8+
9+
func TestSwapDirectoriesAtomic(t *testing.T) {
10+
expectNoError := func(t *testing.T, err error) {
11+
if err != nil {
12+
t.Fatalf("Expected no error, got %v", err)
13+
}
14+
}
15+
16+
testCases := []struct {
17+
name string
18+
setup func(t *testing.T, tmpDir string) (dirA, dirB string)
19+
checkResult func(t *testing.T, dirA, dirB string, err error)
20+
}{
21+
{
22+
name: "both directories exist",
23+
setup: func(t *testing.T, tmpDir string) (string, string) {
24+
dirA := filepath.Join(tmpDir, "a")
25+
dirB := filepath.Join(tmpDir, "b")
26+
27+
if err := os.Mkdir(dirA, 0755); err != nil {
28+
t.Fatalf("Failed to create directory %s: %v", dirA, err)
29+
}
30+
if err := os.Mkdir(dirB, 0755); err != nil {
31+
t.Fatalf("Failed to create directory %s: %v", dirB, err)
32+
}
33+
34+
fileA, err := os.Create(filepath.Join(dirA, "1.txt"))
35+
expectNoError(t, err)
36+
defer fileA.Close()
37+
38+
fileB, err := os.Create(filepath.Join(dirB, "2.txt"))
39+
expectNoError(t, err)
40+
defer fileB.Close()
41+
42+
return dirA, dirB
43+
},
44+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
45+
expectNoError(t, err)
46+
47+
// Make sure the contents are swapped.
48+
fileA, err := os.Open(filepath.Join(dirA, "2.txt"))
49+
if err != nil {
50+
t.Errorf("Expected directory %q to contain file 2.txt: %v", dirA, err)
51+
}
52+
defer fileA.Close()
53+
54+
fileB, err := os.Open(filepath.Join(dirB, "1.txt"))
55+
if err != nil {
56+
t.Errorf("Expected directory %q to contain file 1.txt: %v", dirB, err)
57+
}
58+
defer fileB.Close()
59+
},
60+
},
61+
{
62+
name: "directory A does not exist",
63+
setup: func(t *testing.T, tmpDir string) (string, string) {
64+
dirA := filepath.Join(tmpDir, "a")
65+
dirB := filepath.Join(tmpDir, "b")
66+
67+
if err := os.Mkdir(dirB, 0755); err != nil {
68+
t.Fatalf("Failed to create directory %s: %v", dirB, err)
69+
}
70+
71+
return dirA, dirB
72+
},
73+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
74+
if !os.IsNotExist(err) {
75+
t.Errorf("Expected a directory not exists error, got %v", err)
76+
}
77+
},
78+
},
79+
{
80+
name: "directory B does not exist",
81+
setup: func(t *testing.T, tmpDir string) (string, string) {
82+
dirA := filepath.Join(tmpDir, "a")
83+
dirB := filepath.Join(tmpDir, "b")
84+
85+
if err := os.Mkdir(dirA, 0755); err != nil {
86+
t.Fatalf("Failed to create directory %s: %v", dirA, err)
87+
}
88+
89+
return dirA, dirB
90+
},
91+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
92+
if !os.IsNotExist(err) {
93+
t.Errorf("Expected a directory not exists error, got %v", err)
94+
}
95+
},
96+
},
97+
}
98+
99+
for _, tc := range testCases {
100+
t.Run(tc.name, func(t *testing.T) {
101+
dirA, dirB := tc.setup(t, t.TempDir())
102+
tc.checkResult(t, dirA, dirB, SwapDirectoriesAtomic(dirA, dirB))
103+
})
104+
}
105+
}

0 commit comments

Comments
 (0)