Skip to content

Commit 60f05a8

Browse files
committed
certsyncpod: Swap secret/cm directories atomically
Currently it can happen that cert-syncer replaces some of the secret/configmap files successfully and then fails. This can lead to problems when these are e.g. TLS cert/key files and the directory gets inconsistent. This may seem transient, but when cert-syncer is terminated in the middle, it can later fail to start as the whole kube-apiserver gets into a crash loop. This introduces a new staticpod.SwapDirectoriesAtomic, which uses unix.Renameat2 with RENAME_EXCHANGE flag set.
1 parent 4139e55 commit 60f05a8

File tree

4 files changed

+424
-42
lines changed

4 files changed

+424
-42
lines changed

pkg/operator/staticpod/certsyncpod/certsync_controller.go

Lines changed: 73 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
115115

116116
contentDir := getConfigMapDir(c.destinationDir, cm.Name)
117117

118-
data := map[string]string{}
118+
data := make(map[string]string, len(configMap.Data))
119119
for filename := range configMap.Data {
120120
fullFilename := filepath.Join(contentDir, filename)
121121

@@ -152,27 +152,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
152152
continue
153153
}
154154

155-
klog.Infof("Creating directory %q ...", contentDir)
156-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
157-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
158-
errors = append(errors, err)
159-
continue
160-
}
161-
for filename, content := range configMap.Data {
162-
fullFilename := filepath.Join(contentDir, filename)
163-
// if the existing is the same, do nothing
164-
if reflect.DeepEqual(data[fullFilename], content) {
165-
continue
166-
}
167-
168-
klog.Infof("Writing configmap manifest %q ...", fullFilename)
169-
if err := staticpod.WriteFileAtomic([]byte(content), 0644, fullFilename); err != nil {
170-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for configmap: %s/%s: %v", configMap.Namespace, configMap.Name, err)
171-
errors = append(errors, err)
172-
continue
173-
}
174-
}
175-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated configmap: %s/%s", configMap.Namespace, configMap.Name)
155+
errors = append(errors, writeFiles(&realFS, c.eventRecorder, configMap.Namespace, configMap.Name, "configmap", contentDir, data, 0644)...)
176156
}
177157

178158
klog.Infof("Syncing secrets: %v", c.secrets)
@@ -220,7 +200,7 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
220200

221201
contentDir := getSecretDir(c.destinationDir, s.Name)
222202

223-
data := map[string][]byte{}
203+
data := make(map[string][]byte, len(secret.Data))
224204
for filename := range secret.Data {
225205
fullFilename := filepath.Join(contentDir, filename)
226206

@@ -257,29 +237,80 @@ func (c *CertSyncController) sync(ctx context.Context, syncCtx factory.SyncConte
257237
continue
258238
}
259239

260-
klog.Infof("Creating directory %q ...", contentDir)
261-
if err := os.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
262-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed creating directory for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
240+
errors = append(errors, writeFiles(&realFS, c.eventRecorder, secret.Namespace, secret.Name, "secret", contentDir, data, 0600)...)
241+
}
242+
243+
return utilerrors.NewAggregate(errors)
244+
}
245+
246+
type fileSystem struct {
247+
MkdirAll func(path string, perm os.FileMode) error
248+
MkdirTemp func(dir, pattern string) (string, error)
249+
RemoveAll func(path string) error
250+
WriteFile func(name string, data []byte, perm os.FileMode) error
251+
SwapDirectoriesAtomic func(dirA, dirB string) error
252+
}
253+
254+
var realFS = fileSystem{
255+
MkdirAll: os.MkdirAll,
256+
MkdirTemp: os.MkdirTemp,
257+
RemoveAll: os.RemoveAll,
258+
WriteFile: os.WriteFile,
259+
SwapDirectoriesAtomic: staticpod.SwapDirectoriesAtomic,
260+
}
261+
262+
func writeFiles[C string | []byte](
263+
fs *fileSystem, eventRecorder events.Recorder,
264+
objectNamespace, objectName, kind string,
265+
contentDir string, files map[string]C, filePerm os.FileMode,
266+
) []error {
267+
var errors []error
268+
269+
// We are going to atomically swap the new data directory for the old one.
270+
// In case the target directory does not exist, create it so that the directory not existing is not a special case.
271+
klog.Infof("Ensuring directory %q exists ...", contentDir)
272+
if err := fs.MkdirAll(contentDir, 0755); err != nil && !os.IsExist(err) {
273+
eventRecorder.Warningf("CertificateUpdateFailed",
274+
"Failed creating content directory for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
275+
errors = append(errors, err)
276+
return errors
277+
}
278+
279+
// Create a tmp source directory to be swapped.
280+
srcDir, err := fs.MkdirTemp(filepath.Dir(contentDir), filepath.Base(contentDir)+"-*")
281+
if err != nil {
282+
eventRecorder.Warningf("CertificateUpdateFailed",
283+
"Failed to create source %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
284+
errors = append(errors, err)
285+
return errors
286+
}
287+
defer fs.RemoveAll(srcDir)
288+
289+
// Populate the tmp directory with files.
290+
for filename, content := range files {
291+
fullFilename := filepath.Join(srcDir, filename)
292+
klog.Infof("Writing %s manifest %q ...", kind, fullFilename)
293+
294+
if err := fs.WriteFile(fullFilename, []byte(content), filePerm); err != nil {
295+
eventRecorder.Warningf("CertificateUpdateFailed",
296+
"Failed writing file for %s: %s/%s: %v", kind, objectNamespace, objectName, err)
263297
errors = append(errors, err)
264298
continue
265299
}
266-
for filename, content := range secret.Data {
267-
// TODO fix permissions
268-
fullFilename := filepath.Join(contentDir, filename)
269-
// if the existing is the same, do nothing
270-
if reflect.DeepEqual(data[fullFilename], content) {
271-
continue
272-
}
300+
}
301+
if len(errors) > 0 {
302+
return errors
303+
}
273304

274-
klog.Infof("Writing secret manifest %q ...", fullFilename)
275-
if err := staticpod.WriteFileAtomic(content, 0600, fullFilename); err != nil {
276-
c.eventRecorder.Warningf("CertificateUpdateFailed", "Failed writing file for secret: %s/%s: %v", secret.Namespace, secret.Name, err)
277-
errors = append(errors, err)
278-
continue
279-
}
280-
}
281-
c.eventRecorder.Eventf("CertificateUpdated", "Wrote updated secret: %s/%s", secret.Namespace, secret.Name)
305+
// Swap directories atomically.
306+
if err := fs.SwapDirectoriesAtomic(contentDir, srcDir); err != nil {
307+
eventRecorder.Warningf("CertificateUpdateFailed",
308+
"Failed to enable new %s directory for %s/%s: %v", kind, objectNamespace, objectName, err)
309+
errors = append(errors, err)
310+
return errors
282311
}
283312

284-
return utilerrors.NewAggregate(errors)
313+
eventRecorder.Eventf("CertificateUpdated",
314+
"Wrote updated %s: %s/%s", kind, objectNamespace, objectName)
315+
return nil
285316
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
package certsyncpod
2+
3+
import (
4+
"errors"
5+
"os"
6+
"path/filepath"
7+
"testing"
8+
9+
"github.com/google/go-cmp/cmp"
10+
11+
"github.com/openshift/library-go/pkg/operator/events/eventstesting"
12+
)
13+
14+
func TestWriteFiles(t *testing.T) {
15+
const (
16+
objectNamespace = "default"
17+
objectName = "server_cert"
18+
)
19+
20+
newRealFS := func() *fileSystem {
21+
fs := realFS
22+
return &fs
23+
}
24+
25+
checkDirectoryContents := func(t *testing.T, contentDir string, files map[string][]byte) {
26+
// Ensure the content directory is in sync.
27+
entries, err := os.ReadDir(contentDir)
28+
if err != nil {
29+
t.Fatalf("Failed to read directory %q: %v", contentDir, err)
30+
}
31+
writtenData := make(map[string][]byte, len(entries))
32+
for _, entry := range entries {
33+
info, err := entry.Info()
34+
if err != nil {
35+
t.Fatalf("Failed to read file information for %q: %v", entry.Name(), err)
36+
}
37+
if perm := info.Mode().Perm(); perm != 0600 {
38+
t.Errorf("Unexpected file permissions for %q: %v", entry.Name(), perm)
39+
}
40+
41+
content, err := os.ReadFile(filepath.Join(contentDir, entry.Name()))
42+
if err != nil {
43+
t.Fatalf("Failed to read file %q: %v", entry.Name(), err)
44+
}
45+
writtenData[entry.Name()] = content
46+
}
47+
if !cmp.Equal(writtenData, files) {
48+
t.Errorf("Unexpected directory content:\n%s\n", cmp.Diff(files, writtenData))
49+
}
50+
}
51+
52+
ensureParentDirectoryClean := func(t *testing.T, contentDir string) {
53+
// Make sure there are no leftovers in the parent directory.
54+
parentDir := filepath.Dir(contentDir)
55+
parentEntries, err := os.ReadDir(parentDir)
56+
if err != nil {
57+
t.Fatalf("Failed to read directory %q: %v", parentDir, err)
58+
}
59+
if n := len(parentEntries); n != 1 {
60+
t.Errorf("Unexpected number of entries in directory %q: %d", parentDir, n)
61+
for _, entry := range parentEntries {
62+
t.Logf("Parent directory entry: %q", entry.Name())
63+
}
64+
}
65+
}
66+
67+
checkOnSuccess := func(t *testing.T, contentDir string, directoryData, objectData map[string][]byte, errs []error) {
68+
if len(errs) > 0 {
69+
t.Fatalf("Unexpected errors when writing new data object: %v", errs)
70+
}
71+
72+
checkDirectoryContents(t, contentDir, objectData)
73+
ensureParentDirectoryClean(t, contentDir)
74+
}
75+
76+
checkOnError := func(t *testing.T, contentDir string, directoryData, objectData map[string][]byte, errs []error) {
77+
if len(errs) == 0 {
78+
t.Error("Expected some errors, got none")
79+
}
80+
81+
checkDirectoryContents(t, contentDir, directoryData)
82+
ensureParentDirectoryClean(t, contentDir)
83+
}
84+
85+
type testCase struct {
86+
name string
87+
newFS func() *fileSystem
88+
directoryData map[string][]byte
89+
objectData map[string][]byte
90+
checkState func(t *testing.T, contentDir string, directoryData, objectData map[string][]byte, errs []error)
91+
}
92+
93+
errorTestCase := func(name string, newFS func() *fileSystem) testCase {
94+
return testCase{
95+
name: name,
96+
newFS: newFS,
97+
directoryData: map[string][]byte{
98+
"tls.crt": []byte("TLS cert"),
99+
"tls.key": []byte("TLS key"),
100+
},
101+
objectData: map[string][]byte{
102+
"api.crt": []byte("rotated TLS cert"),
103+
"api.key": []byte("rotated TLS key"),
104+
},
105+
checkState: checkOnError,
106+
}
107+
}
108+
109+
testCases := []testCase{
110+
{
111+
name: "create target directory",
112+
newFS: newRealFS,
113+
directoryData: nil,
114+
objectData: map[string][]byte{
115+
"tls.crt": []byte("TLS cert"),
116+
"tls.key": []byte("TLS key"),
117+
},
118+
checkState: checkOnSuccess,
119+
},
120+
{
121+
name: "update target directory change file content",
122+
newFS: newRealFS,
123+
directoryData: map[string][]byte{
124+
"tls.crt": []byte("TLS cert"),
125+
"tls.key": []byte("TLS key"),
126+
},
127+
objectData: map[string][]byte{
128+
"tls.crt": []byte("rotated TLS cert"),
129+
"tls.key": []byte("rotated TLS key"),
130+
},
131+
checkState: checkOnSuccess,
132+
},
133+
{
134+
name: "update target directory change filenames",
135+
newFS: newRealFS,
136+
directoryData: map[string][]byte{
137+
"tls.crt": []byte("TLS cert"),
138+
"tls.key": []byte("TLS key"),
139+
},
140+
objectData: map[string][]byte{
141+
"api.crt": []byte("rotated TLS cert"),
142+
"api.key": []byte("rotated TLS key"),
143+
},
144+
checkState: checkOnSuccess,
145+
},
146+
errorTestCase("directory unchanged on failed to create object directory", func() *fileSystem {
147+
fs := newRealFS()
148+
fs.MkdirAll = func(path string, perm os.FileMode) error {
149+
return errors.New("nuked")
150+
}
151+
return fs
152+
}),
153+
errorTestCase("directory unchanged on failed to create temporary directory", func() *fileSystem {
154+
fs := newRealFS()
155+
fs.MkdirTemp = func(dir, pattern string) (string, error) {
156+
return "", errors.New("nuked")
157+
}
158+
return fs
159+
}),
160+
errorTestCase("directory unchanged on failed to write file", func() *fileSystem {
161+
fs := newRealFS()
162+
fs.WriteFile = func(path string, data []byte, perm os.FileMode) error {
163+
return errors.New("nuked")
164+
}
165+
return fs
166+
}),
167+
errorTestCase("directory unchanged on failed to swap directories", func() *fileSystem {
168+
fs := newRealFS()
169+
fs.SwapDirectoriesAtomic = func(dirA, dirB string) error {
170+
return errors.New("nuked")
171+
}
172+
return fs
173+
}),
174+
}
175+
176+
for _, tc := range testCases {
177+
t.Run(tc.name, func(t *testing.T) {
178+
recorder := eventstesting.NewTestingEventRecorder(t)
179+
180+
// Write the current directory contents.
181+
contentDir := filepath.Join(t.TempDir(), "secrets", objectName)
182+
if tc.directoryData != nil {
183+
if err := os.MkdirAll(contentDir, 0755); err != nil {
184+
t.Fatalf("Failed to create content directory %q: %v", contentDir, err)
185+
}
186+
187+
for filename, content := range tc.directoryData {
188+
targetPath := filepath.Join(contentDir, filename)
189+
if err := os.WriteFile(targetPath, content, 0600); err != nil {
190+
t.Fatalf("Failed to populate file %q: %v", targetPath, err)
191+
}
192+
}
193+
}
194+
195+
// Replace with the object data.
196+
errs := writeFiles(tc.newFS(), recorder, objectNamespace, objectName, "secret",
197+
contentDir, tc.objectData, 0600)
198+
199+
// Check the resulting state.
200+
tc.checkState(t, contentDir, tc.directoryData, tc.objectData, errs)
201+
})
202+
}
203+
204+
}

pkg/operator/staticpod/file_utils.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
8+
"golang.org/x/sys/unix"
79
)
810

911
func WriteFileAtomic(content []byte, filePerms os.FileMode, fullFilename string) error {
@@ -31,3 +33,16 @@ func writeTemporaryFile(content []byte, filePerms os.FileMode, fullFilename stri
3133
}
3234
return tmpfile.Name(), nil
3335
}
36+
37+
// SwapDirectoriesAtomic can be used to swap two directories atomically.
38+
//
39+
// This function requires absolute paths and will return an error if that's not the case.
40+
func SwapDirectoriesAtomic(dirA, dirB string) error {
41+
if !filepath.IsAbs(dirA) {
42+
return fmt.Errorf("not an absolute path: %q", dirA)
43+
}
44+
if !filepath.IsAbs(dirB) {
45+
return fmt.Errorf("not an absolute path: %q", dirB)
46+
}
47+
return unix.Renameat2(0, dirA, 0, dirB, unix.RENAME_EXCHANGE)
48+
}

0 commit comments

Comments
 (0)