Skip to content

Commit cd71e92

Browse files
jianghao65536cyphar
authored andcommitted
systemd: retry when the dbus connection returns EAGAIN
Signed-off-by: jianghao65536 <[email protected]> [cyphar: gofumpt systemd/dbus_test.go] [cyphar: simplify retry loop to return from inside loop] [cyphar: improve exponential backoff to be less aggressive] [cyphar: improve parallel test] Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 2f41057 commit cd71e92

File tree

2 files changed

+78
-3
lines changed

2 files changed

+78
-3
lines changed

systemd/dbus.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"math/rand/v2"
78
"sync"
9+
"time"
810

911
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
1012
dbus "github.com/godbus/dbus/v5"
13+
"golang.org/x/sys/unix"
1114
)
1215

1316
var (
@@ -64,10 +67,27 @@ func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
6467
}
6568

6669
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
67-
if dbusRootless {
68-
return newUserSystemdDbus()
70+
newDbusConn := func() (*systemdDbus.Conn, error) {
71+
if dbusRootless {
72+
return newUserSystemdDbus()
73+
}
74+
return systemdDbus.NewWithContext(context.TODO())
75+
}
76+
77+
var err error
78+
for retry := range 7 {
79+
var conn *systemdDbus.Conn
80+
conn, err = newDbusConn()
81+
if !errors.Is(err, unix.EAGAIN) {
82+
return conn, err
83+
}
84+
// Exponential backoff (100ms * 2^attempt + ~12.5% jitter).
85+
// At most we would expect 15 seconds of delay with 7 attempts.
86+
delay := 100 * time.Millisecond << retry
87+
delay += time.Duration(rand.Int64N(1 + (delay.Milliseconds() >> 3)))
88+
time.Sleep(delay)
6989
}
70-
return systemdDbus.NewWithContext(context.TODO())
90+
return nil, fmt.Errorf("dbus connection failed after several retries: %w", err)
7191
}
7292

7393
// resetConnection resets the connection to its initial state

systemd/dbus_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package systemd
2+
3+
import (
4+
"context"
5+
"os"
6+
"sync"
7+
"testing"
8+
)
9+
10+
func TestParallelConnection(t *testing.T) {
11+
if !IsRunningSystemd() {
12+
t.Skip("Test requires systemd.")
13+
}
14+
var dms []*dbusConnManager
15+
for range 600 {
16+
dms = append(dms, newDbusConnManager(os.Geteuid() != 0))
17+
}
18+
19+
ctx, cancel := context.WithCancel(context.Background())
20+
defer cancel()
21+
22+
var (
23+
doneWg sync.WaitGroup
24+
startCh = make(chan struct{})
25+
errCh = make(chan error, 1)
26+
)
27+
for _, dm := range dms {
28+
doneWg.Add(1)
29+
go func(dm *dbusConnManager) {
30+
defer doneWg.Done()
31+
select {
32+
case <-ctx.Done():
33+
return
34+
case <-startCh:
35+
_, err := dm.newConnection()
36+
if err != nil {
37+
// Only bother trying to send the first error.
38+
select {
39+
case errCh <- err:
40+
default:
41+
}
42+
cancel()
43+
}
44+
}
45+
}(dm)
46+
}
47+
close(startCh) // trigger all connection attempts
48+
doneWg.Wait()
49+
50+
select {
51+
case err := <-errCh:
52+
t.Fatal(err)
53+
default:
54+
}
55+
}

0 commit comments

Comments
 (0)