Skip to content

Commit e38a84d

Browse files
authored
Fix read-only filesystem error during systemd handover (#219)
* Fix read-only filesystem error during systemd handover - Improved checkSocketDirWritable to specifically test Unix socket binding. - Added RuntimeDirectoryPreserve=yes to systemd service template. - Updated README with systemd configuration requirements. - Added unit tests for the improved writability check. This resolves the issue where systemd would remount /var/run/duckgres as read-only when the old process exited during a graceful handover. * Remove scripts/duckgres.service and update README * Fix lint issue in validation_test.go
1 parent 4d68a76 commit e38a84d

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,8 @@ PGPASSWORD=postgres psql "host=localhost port=5432 user=postgres sslmode=require
569569
./duckgres-v2 --mode control-plane --port 5432 --handover-socket /var/run/duckgres/handover.sock
570570
```
571571

572+
When running under **systemd** with `RuntimeDirectory`, ensure `RuntimeDirectoryPreserve=yes` is set in your unit file. This prevents systemd from cleaning up or remounting the socket directory as read-only when the old process exits during a handover.
573+
572574
**Rolling worker updates** via signal:
573575

574576
```bash

controlplane/handover.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -327,17 +327,29 @@ func recvFD(uc *net.UnixConn) (*os.File, error) {
327327
}
328328

329329
// checkSocketDirWritable verifies that the socket directory is writable by
330-
// creating and removing a temporary file. This catches cases where
331-
// ProtectSystem=strict's ReadWritePaths bind mount has been lost (e.g., after
332-
// a handover race with systemd's RuntimeDirectory cleanup).
330+
// creating and removing both a temporary file and a Unix socket. This catches
331+
// cases where ProtectSystem=strict's ReadWritePaths bind mount has been lost
332+
// or remounted RO (e.g., after a handover race with systemd's RuntimeDirectory cleanup).
333333
func checkSocketDirWritable(dir string) error {
334+
// Test regular file creation
334335
f, err := os.CreateTemp(dir, ".writable-check-*")
335336
if err != nil {
336337
return fmt.Errorf("cannot write to socket directory %s: %w", dir, err)
337338
}
338339
name := f.Name()
339340
_ = f.Close()
340341
_ = os.Remove(name)
342+
343+
// Test Unix socket binding specifically (catches different EROFS/EPERM cases)
344+
socketPath := fmt.Sprintf("%s/.socket-check-%d.sock", dir, os.Getpid())
345+
_ = os.Remove(socketPath)
346+
l, err := net.Listen("unix", socketPath)
347+
if err != nil {
348+
return fmt.Errorf("cannot bind unix socket in %s: %w", dir, err)
349+
}
350+
_ = l.Close()
351+
_ = os.Remove(socketPath)
352+
341353
return nil
342354
}
343355

controlplane/validation_test.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package controlplane
22

3-
import "testing"
3+
import (
4+
"os"
5+
"strings"
6+
"testing"
7+
)
48

59
func TestValidateTLSFiles(t *testing.T) {
610
if err := validateTLSFiles("cert.pem", "key.pem"); err != nil {
@@ -92,3 +96,40 @@ func TestValidateControlPlaneSecurity(t *testing.T) {
9296
t.Fatalf("expected error for missing users")
9397
}
9498
}
99+
100+
func TestCheckSocketDirWritable(t *testing.T) {
101+
// Happy path: writable directory
102+
tmpDir := t.TempDir()
103+
// Unix socket paths have a max length (~104 bytes on macOS). t.TempDir()
104+
// paths can be very long, so use a short filename for the check.
105+
if err := checkSocketDirWritable(tmpDir); err != nil {
106+
// If t.TempDir is still too long, fallback to /tmp
107+
if strings.Contains(err.Error(), "invalid argument") || strings.Contains(err.Error(), "too long") {
108+
shortDir, err2 := os.MkdirTemp("/tmp", "dg-test-*")
109+
if err2 == nil {
110+
defer func() { _ = os.RemoveAll(shortDir) }()
111+
if err3 := checkSocketDirWritable(shortDir); err3 != nil {
112+
t.Fatalf("expected short directory %s to be writable: %v", shortDir, err3)
113+
}
114+
return
115+
}
116+
}
117+
t.Fatalf("expected directory %s to be writable: %v", tmpDir, err)
118+
}
119+
120+
// Failure path: non-existent directory
121+
if err := checkSocketDirWritable("/non/existent/path/for/duckgres/test"); err == nil {
122+
t.Fatalf("expected error for non-existent directory")
123+
}
124+
125+
// Failure path: read-only directory (if we can create one)
126+
roDir := t.TempDir()
127+
if err := os.Chmod(roDir, 0555); err != nil {
128+
t.Fatalf("failed to chmod directory to read-only: %v", err)
129+
}
130+
defer func() { _ = os.Chmod(roDir, 0755) }() // Restore for cleanup
131+
132+
if err := checkSocketDirWritable(roDir); err == nil {
133+
t.Fatalf("expected error for read-only directory")
134+
}
135+
}

0 commit comments

Comments
 (0)