Skip to content

Commit 38c2b4b

Browse files
fix(sshutil): add mutex to TOFU known_hosts to prevent race condition (#644)
Concurrent SSH connections (during cluster provisioning) could race on the known_hosts file read-then-write, causing duplicate entries or inconsistent state. Add a package-level mutex around the callback. Audit finding #13 (MEDIUM). Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
1 parent 378d2bc commit 38c2b4b

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

pkg/sshutil/tofu.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,16 @@ import (
2222
"os"
2323
"path/filepath"
2424
"strings"
25+
"sync"
2526

2627
"golang.org/x/crypto/ssh"
2728
)
2829

30+
// tofuMu serialises access to the known_hosts file so that concurrent SSH
31+
// connections (e.g. during multi-node cluster provisioning) do not race on the
32+
// read-then-write.
33+
var tofuMu sync.Mutex
34+
2935
// TOFUHostKeyCallback returns an ssh.HostKeyCallback implementing a
3036
// Trust-On-First-Use (TOFU) pattern for SSH host key verification. On first
3137
// connection to a host, the key is recorded in a holodeck-specific known_hosts
@@ -34,6 +40,9 @@ import (
3440
// a potential MITM attack — is rejected with an error.
3541
func TOFUHostKeyCallback() ssh.HostKeyCallback {
3642
return func(hostname string, remote net.Addr, key ssh.PublicKey) error {
43+
tofuMu.Lock()
44+
defer tofuMu.Unlock()
45+
3746
cacheBase, err := os.UserCacheDir()
3847
if err != nil {
3948
return fmt.Errorf("cannot determine cache directory for TOFU host keys: %w", err)

0 commit comments

Comments
 (0)