Skip to content

Commit a389cbd

Browse files
committed
certsyncpod: Swap secret/cm directories atomically
Currently it can happen that cert-syncer replaces some of the secret/configmap files successfully and then fails. This can lead to problems when these are e.g. TLS cert/key files and the directory gets inconsistent. This may seem transient, but when cert-syncer is terminated in the middle, it can later fail to start as the whole kube-apiserver gets into a crash loop. This introduces a new staticpod.SwapDirectoriesAtomic, which uses unix.Renameat2 with RENAME_EXCHANGE flag set.
1 parent 4139e55 commit a389cbd

File tree

4 files changed

+300
-42
lines changed

4 files changed

+300
-42
lines changed

pkg/operator/staticpod/certsyncpod/certsync_controller.go

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
115115

116116
contentDir := getConfigMapDir(c.destinationDir, cm.Name)
117117

118-
data := map[string]string{}
118+
data := make(map[string]string, len(configMap.Data))
119+
dataBytes := make(map[string][]byte, len(configMap.Data))
119120
for filename := range configMap.Data {
120121
fullFilename := filepath.Join(contentDir, filename)
121122

@@ -128,6 +129,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
128129
}
129130

130131
data[filename] = string(existingContent)
132+
dataBytes[filename] = existingContent
131133
}
132134

133135
// Check if cached configmap differs
@@ -152,27 +154,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
152154
continue
153155
}
154156

155-
klog.Infof("Creating directory %q ...", contentDir)
156-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
157-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
158-
errors = append(errors, err)
159-
continue
160-
}
161-
for filename, content := range configMap.Data {
162-
fullFilename := filepath.Join(contentDir, filename)
163-
// if the existing is the same, do nothing
164-
if reflect.DeepEqual(data[fullFilename], content) {
165-
continue
166-
}
167-
168-
klog.Infof("Writing configmap manifest %q ...", fullFilename)
169-
if err := staticpod.WriteFileAtomic([]byte(content), 0644, fullFilename); err != nil {
170-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
171-
errors = append(errors, err)
172-
continue
173-
}
174-
}
175-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated configmap: %s/%s", configMap.Namespace, configMap.Name)
157+
errors = append(errors, c.writeData(configMap.Namespace, configMap.Name, "configmap", contentDir, dataBytes)...)
176158
}
177159

178160
klog.Infof("Syncing secrets: %v", c.secrets)
@@ -220,7 +202,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
220202

221203
contentDir := getSecretDir(c.destinationDir, s.Name)
222204

223-
data := map[string][]byte{}
205+
data := make(map[string][]byte, len(secret.Data))
224206
for filename := range secret.Data {
225207
fullFilename := filepath.Join(contentDir, filename)
226208

@@ -257,29 +239,64 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
257239
continue
258240
}
259241

260-
klog.Infof("Creating directory %q ...", contentDir)
261-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
262-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
242+
errors = append(errors, c.writeData(secret.Namespace, secret.Name, "secret", contentDir, data)...)
243+
}
244+
245+
return utilerrors.NewAggregate(errors)
246+
}
247+
248+
func (c *CertSyncController) writeData(
249+
objectNamespace, objectName, kind string,
250+
contentDir string, data map[string][]byte,
251+
) []error {
252+
var errors []error
253+
254+
// We are going to atomically swap the new data directory for the old one.
255+
// In case the target directory does not exist, create it so that the directory not existing is not a special case.
256+
klog.Infof("Ensuring directory %q exists ...", contentDir)
257+
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
258+
c.eventRecorder.Warningf("CertificateUpdateFailed",
259+
"Failed creating content directory for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
260+
errors = append(errors, err)
261+
return errors
262+
}
263+
264+
// Create a tmp source directory to be swapped.
265+
srcDir, err := os.MkdirTemp(filepath.Dir(contentDir), filepath.Base(contentDir)+"-*")
266+
if err != nil {
267+
c.eventRecorder.Warningf("CertificateUpdateFailed",
268+
"Failed to create source %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
269+
errors = append(errors, err)
270+
return errors
271+
}
272+
defer os.RemoveAll(srcDir)
273+
274+
// Populate the tmp directory with files.
275+
for filename, content := range data {
276+
// TODO: Fix permissions
277+
fullFilename := filepath.Join(srcDir, filename)
278+
klog.Infof("Writing %s manifest %q ...", kind, fullFilename)
279+
280+
if err := os.WriteFile(fullFilename, content, 0600); err != nil {
281+
c.eventRecorder.Warningf("CertificateUpdateFailed",
282+
"Failed writing file for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
263283
errors = append(errors, err)
264284
continue
265285
}
266-
for filename, content := range secret.Data {
267-
// TODO fix permissions
268-
fullFilename := filepath.Join(contentDir, filename)
269-
// if the existing is the same, do nothing
270-
if reflect.DeepEqual(data[fullFilename], content) {
271-
continue
272-
}
286+
}
287+
if len(errors) > 0 {
288+
return errors
289+
}
273290

274-
klog.Infof("Writing secret manifest %q ...", fullFilename)
275-
if err := staticpod.WriteFileAtomic(content, 0600, fullFilename); err != nil {
276-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
277-
errors = append(errors, err)
278-
continue
279-
}
280-
}
281-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated secret: %s/%s", secret.Namespace, secret.Name)
291+
// Swap directories atomically.
292+
if err := staticpod.SwapDirectoriesAtomic(contentDir, srcDir); err != nil {
293+
c.eventRecorder.Warningf("CertificateUpdateFailed",
294+
"Failed to enable new %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
295+
errors = append(errors, err)
296+
return errors
282297
}
283298

284-
return utilerrors.NewAggregate(errors)
299+
c.eventRecorder.Eventf("CertificateUpdated",
300+
"Wrote updated %s: %s/%s", kind, objectNamespace, objectName)
301+
return nil
285302
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package certsyncpod
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/google/go-cmp/cmp"
9+
"github.com/openshift/library-go/pkg/operator/events/eventstesting"
10+
)
11+
12+
func TestCertSyncController_WriteData(t *testing.T) {
13+
const (
14+
objectNamespace = "default"
15+
objectName = "server_cert"
16+
)
17+
18+
testCases := []struct {
19+
name string
20+
directoryData map[string][]byte
21+
objectData map[string][]byte
22+
}{
23+
{
24+
name: "create target directory",
25+
directoryData: nil,
26+
objectData: map[string][]byte{
27+
"tls.crt": []byte("TLS cert"),
28+
"tls.key": []byte("TLS key"),
29+
},
30+
},
31+
{
32+
name: "update target directory change file content",
33+
directoryData: map[string][]byte{
34+
"tls.crt": []byte("TLS cert"),
35+
"tls.key": []byte("TLS key"),
36+
},
37+
objectData: map[string][]byte{
38+
"tls.crt": []byte("rotated TLS cert"),
39+
"tls.key": []byte("rotated TLS key"),
40+
},
41+
},
42+
{
43+
name: "update target directory change filenames",
44+
directoryData: map[string][]byte{
45+
"tls.crt": []byte("TLS cert"),
46+
"tls.key": []byte("TLS key"),
47+
},
48+
objectData: map[string][]byte{
49+
"api.crt": []byte("rotated TLS cert"),
50+
"api.key": []byte("rotated TLS key"),
51+
},
52+
},
53+
}
54+
55+
for _, tc := range testCases {
56+
t.Run(tc.name, func(t *testing.T) {
57+
// Init the controller.
58+
recorder := eventstesting.NewTestingEventRecorder(t)
59+
controller := &CertSyncController{
60+
eventRecorder: recorder,
61+
}
62+
63+
// Write the current directory contents.
64+
contentDir := filepath.Join(t.TempDir(), "secrets", objectName)
65+
if tc.directoryData != nil {
66+
if err := os.MkdirAll(contentDir, 0755); err != nil {
67+
t.Fatalf("Failed to create content directory %q: %v", contentDir, err)
68+
}
69+
70+
for filename, content := range tc.directoryData {
71+
targetPath := filepath.Join(contentDir, filename)
72+
if err := os.WriteFile(targetPath, content, 0644); err != nil {
73+
t.Fatalf("Failed to populate file %q: %v", targetPath, err)
74+
}
75+
}
76+
}
77+
78+
// Replace with the object data.
79+
errs := controller.writeData(objectNamespace, objectName, "secret", contentDir, tc.objectData)
80+
if len(errs) > 0 {
81+
t.Fatalf("Unexpected errors when writing new data object: %v", errs)
82+
}
83+
84+
// Ensure the content directory is in sync.
85+
entries, err := os.ReadDir(contentDir)
86+
if err != nil {
87+
t.Fatalf("Failed to read directory %q: %v", contentDir, err)
88+
}
89+
writtenData := make(map[string][]byte, len(entries))
90+
for _, entry := range entries {
91+
content, err := os.ReadFile(filepath.Join(contentDir, entry.Name()))
92+
if err != nil {
93+
t.Fatalf("Failed to read file %q: %v", entry.Name(), err)
94+
}
95+
writtenData[entry.Name()] = content
96+
}
97+
if !cmp.Equal(writtenData, tc.objectData) {
98+
t.Errorf("Unexpected content directory content:\n%s\n", cmp.Diff(tc.objectData, writtenData))
99+
}
100+
101+
// Make sure there are no leftovers in the parent directory.
102+
parentDir := filepath.Dir(contentDir)
103+
parentEntries, err := os.ReadDir(parentDir)
104+
if err != nil {
105+
t.Fatalf("Failed to read directory %q: %v", parentDir, err)
106+
}
107+
if n := len(parentEntries); n != 1 {
108+
t.Errorf("Unexpected number of entries in directory %q: %d", parentDir, n)
109+
for _, entry := range parentEntries {
110+
t.Logf("Parent directory entry: %q", entry.Name())
111+
}
112+
}
113+
})
114+
}
115+
116+
}

pkg/operator/staticpod/file_utils.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
8+
"golang.org/x/sys/unix"
79
)
810

911
func WriteFileAtomic(content []byte, filePerms os.FileMode, fullFilename string) error {
@@ -31,3 +33,20 @@ func writeTemporaryFile(content []byte, filePerms os.FileMode, fullFilename stri
3133
}
3234
return tmpfile.Name(), nil
3335
}
36+
37+
// SwapDirectoriesAtomic can be used to swap two directories atomically.
38+
func SwapDirectoriesAtomic(dirA, dirB string) error {
39+
fdA, err := unix.Open(dirA, unix.O_DIRECTORY, 0)
40+
if err != nil {
41+
return err
42+
}
43+
defer unix.Close(fdA)
44+
45+
fdB, err := unix.Open(dirB, unix.O_DIRECTORY, 0)
46+
if err != nil {
47+
return err
48+
}
49+
defer unix.Close(fdB)
50+
51+
return unix.Renameat2(fdA, dirA, fdB, dirB, unix.RENAME_EXCHANGE)
52+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package staticpod
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
)
8+
9+
func TestSwapDirectoriesAtomic(t *testing.T) {
10+
expectNoError := func(t *testing.T, err error) {
11+
t.Helper()
12+
if err != nil {
13+
t.Fatalf("Expected no error, got %v", err)
14+
}
15+
}
16+
17+
testCases := []struct {
18+
name string
19+
setup func(t *testing.T, tmpDir string) (dirA, dirB string)
20+
checkResult func(t *testing.T, dirA, dirB string, err error)
21+
}{
22+
{
23+
name: "both directories exist",
24+
setup: func(t *testing.T, tmpDir string) (string, string) {
25+
dirA := filepath.Join(tmpDir, "a")
26+
dirB := filepath.Join(tmpDir, "b")
27+
28+
if err := os.Mkdir(dirA, 0755); err != nil {
29+
t.Fatalf("Failed to create directory %s: %v", dirA, err)
30+
}
31+
if err := os.Mkdir(dirB, 0755); err != nil {
32+
t.Fatalf("Failed to create directory %s: %v", dirB, err)
33+
}
34+
35+
fileA, err := os.Create(filepath.Join(dirA, "1.txt"))
36+
expectNoError(t, err)
37+
defer fileA.Close()
38+
39+
fileB, err := os.Create(filepath.Join(dirB, "2.txt"))
40+
expectNoError(t, err)
41+
defer fileB.Close()
42+
43+
return dirA, dirB
44+
},
45+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
46+
expectNoError(t, err)
47+
48+
// Make sure the contents are swapped.
49+
fileA, err := os.Open(filepath.Join(dirA, "2.txt"))
50+
if err != nil {
51+
t.Errorf("Expected directory %q to contain file 2.txt: %v", dirA, err)
52+
}
53+
defer fileA.Close()
54+
55+
fileB, err := os.Open(filepath.Join(dirB, "1.txt"))
56+
if err != nil {
57+
t.Errorf("Expected directory %q to contain file 1.txt: %v", dirB, err)
58+
}
59+
defer fileB.Close()
60+
},
61+
},
62+
{
63+
name: "directory A does not exist",
64+
setup: func(t *testing.T, tmpDir string) (string, string) {
65+
dirA := filepath.Join(tmpDir, "a")
66+
dirB := filepath.Join(tmpDir, "b")
67+
68+
if err := os.Mkdir(dirB, 0755); err != nil {
69+
t.Fatalf("Failed to create directory %s: %v", dirB, err)
70+
}
71+
72+
return dirA, dirB
73+
},
74+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
75+
if !os.IsNotExist(err) {
76+
t.Errorf("Expected a directory not exists error, got %v", err)
77+
}
78+
},
79+
},
80+
{
81+
name: "directory B does not exist",
82+
setup: func(t *testing.T, tmpDir string) (string, string) {
83+
dirA := filepath.Join(tmpDir, "a")
84+
dirB := filepath.Join(tmpDir, "b")
85+
86+
if err := os.Mkdir(dirA, 0755); err != nil {
87+
t.Fatalf("Failed to create directory %s: %v", dirA, err)
88+
}
89+
90+
return dirA, dirB
91+
},
92+
checkResult: func(t *testing.T, dirA, dirB string, err error) {
93+
if !os.IsNotExist(err) {
94+
t.Errorf("Expected a directory not exists error, got %v", err)
95+
}
96+
},
97+
},
98+
}
99+
100+
for _, tc := range testCases {
101+
t.Run(tc.name, func(t *testing.T) {
102+
dirA, dirB := tc.setup(t, t.TempDir())
103+
tc.checkResult(t, dirA, dirB, SwapDirectoriesAtomic(dirA, dirB))
104+
})
105+
}
106+
}

0 commit comments

Comments
 (0)