Skip to content

Commit 448551f

Browse files
authored
fix: orb-agent docker image restart (#271)
1 parent 5b6af5f commit 448551f

File tree

9 files changed

+77
-56
lines changed

9 files changed

+77
-56
lines changed

agent/backend/pktvisor/pktvisor.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ func parsePktvisorEntity(line string) (entity, name, rest string, ok bool) {
313313
func (p *pktvisorBackend) Stop(ctx context.Context) error {
314314
p.logger.Info("routine call to stop pktvisor", "routine", ctx.Value(config.ContextKey("routine")))
315315
defer p.cancelFunc()
316+
316317
err := p.proc.Stop()
317318
finalStatus := <-p.statusChan
318319
if err != nil {
@@ -336,6 +337,15 @@ func (p *pktvisorBackend) Configure(logger *slog.Logger, repo policies.PolicyRep
336337
p.adminAPIPort = defaultAPIPort
337338
p.agentLabels = common.Otlp.AgentLabels
338339

340+
// Clean up old temp config file if it exists
341+
if p.configFile != "" {
342+
if err := os.Remove(p.configFile); err != nil && !os.IsNotExist(err) {
343+
p.logger.Warn("failed to remove old pktvisor temp config file",
344+
"file", p.configFile,
345+
"error", err)
346+
}
347+
}
348+
339349
// Create temp config file
340350
tmpDir := os.TempDir()
341351
tmpFile, err := os.CreateTemp(tmpDir, "pktvisor-*.yaml")
@@ -430,8 +440,7 @@ func (p *pktvisorBackend) FullReset(ctx context.Context) error {
430440
return err
431441
}
432442
}
433-
434-
// for each policy, restart the scraper
443+
// create a new context for the backend
435444
backendCtx, cancelFunc := context.WithCancel(context.WithValue(ctx, config.ContextKey("routine"), "pktvisor"))
436445

437446
// start it

agent/docker/Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ FROM python:3.14-slim-trixie
4747

4848
RUN \
4949
apt update && \
50-
apt install --yes --no-install-recommends nmap openssh-client && \
50+
apt install --yes --no-install-recommends nmap openssh-client tini && \
5151
rm -rf /var/lib/apt/lists/*
5252

5353
RUN mkdir -p /opt/orb
@@ -70,7 +70,6 @@ COPY --from=snmp-discovery /usr/local/bin/snmp-discovery /usr/local/bin/snmp-dis
7070

7171
COPY --from=builder /build/orb-agent /usr/local/bin/orb-agent
7272
COPY --from=builder /src/orb-agent/agent/docker/orb-agent-entry.sh /usr/local/bin/orb-agent-entry.sh
73-
COPY --from=builder /src/orb-agent/agent/docker/run-agent.sh /run-agent.sh
7473
COPY --from=builder /src/orb-agent/agent/docker/default_config.yaml /opt/orb/default_config.yaml
7574

76-
ENTRYPOINT [ "/usr/local/bin/orb-agent-entry.sh" ]
75+
ENTRYPOINT [ "/usr/bin/tini", "--", "/usr/local/bin/orb-agent-entry.sh" ]

agent/docker/orb-agent-entry.sh

Lines changed: 7 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,6 @@
33
# entry point for orb-agent
44
#
55

6-
agentstop1 () {
7-
printf "\rFinishing container.."
8-
exit 0
9-
}
10-
11-
agentstop2 () {
12-
if [ -f "/var/run/orb-agent.pid" ]; then
13-
ID=$(cat /var/run/orb-agent.pid)
14-
kill -15 $ID
15-
fi
16-
}
17-
186
if [ "${INSTALL_DRIVERS_PATH}" != '' ]; then
197
cd "$(dirname "$(realpath "${INSTALL_DRIVERS_PATH}")")"
208
echo "Installing additional drivers"
@@ -74,28 +62,11 @@ if [ -n "${FLEET_CLIENT_ID}" ] && [ -n "${FLEET_CLIENT_SECRET}" ]; then
7462
fi
7563
fi
7664

77-
trap agentstop1 SIGINT
78-
trap agentstop2 SIGTERM
65+
# Default to 'run' subcommand if no args provided (preserve backward compatibility)
66+
if [ ${#agent_args[@]} -eq 0 ]; then
67+
agent_args=(run)
68+
fi
7969

80-
# eternal loop
81-
while true
82-
do
83-
# pid file dont exist
84-
if [ ! -f "/var/run/orb-agent.pid" ]; then
85-
# running orb-agent in background
86-
nohup /run-agent.sh "${agent_args[@]}" &
87-
sleep 2
88-
if [ -d "/nohup.out" ]; then
89-
tail -f /nohup.out &
90-
fi
91-
else
92-
PID=$(cat /var/run/orb-agent.pid)
93-
if [ ! -d "/proc/$PID" ]; then
94-
# stop container
95-
echo "$PID is not running"
96-
rm /var/run/orb-agent.pid
97-
exit 1
98-
fi
99-
sleep 5
100-
fi
101-
done
70+
# Use exec to replace this shell process with the agent
71+
# This makes the agent a direct child of tini, ensuring proper signal handling
72+
exec /usr/local/bin/orb-agent "${agent_args[@]}"

agent/docker/run-agent.sh

Lines changed: 0 additions & 12 deletions
This file was deleted.

agent/otlpbridge/server.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ func (s *BridgeServer) GetPolicyRepo() policies.PolicyRepo {
104104

105105
// Start starts the gRPC server without establishing MQTT.
106106
// Publisher and topic should be set before OTLP data arrives.
107-
func (s *BridgeServer) Start(_ context.Context) error {
108-
lis, err := net.Listen("tcp", s.cfg.ListenAddr)
107+
func (s *BridgeServer) Start(ctx context.Context) error {
108+
// Platform-specific socket configuration (SO_REUSEADDR on Unix for faster port reuse)
109+
lis, err := listen(ctx, s.cfg.ListenAddr)
109110
if err != nil {
110111
return fmt.Errorf("failed to listen on %s (port may be in use by another service): %w", s.cfg.ListenAddr, err)
111112
}

agent/otlpbridge/socket_unix.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//go:build unix
2+
3+
package otlpbridge
4+
5+
import (
6+
"context"
7+
"net"
8+
"syscall"
9+
10+
"golang.org/x/sys/unix"
11+
)
12+
13+
// newListenConfig returns a net.ListenConfig with SO_REUSEADDR enabled for faster port reuse.
14+
// This is particularly important for docker restart scenarios where ports may be in TIME_WAIT.
15+
func newListenConfig() net.ListenConfig {
16+
return net.ListenConfig{
17+
Control: func(_, _ string, c syscall.RawConn) error {
18+
var sockOptErr error
19+
if err := c.Control(func(fd uintptr) {
20+
// Enable SO_REUSEADDR to allow binding to TIME_WAIT sockets
21+
sockOptErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_REUSEADDR, 1)
22+
}); err != nil {
23+
return err
24+
}
25+
return sockOptErr
26+
},
27+
}
28+
}
29+
30+
// listen creates a TCP listener with platform-specific socket options.
31+
func listen(ctx context.Context, addr string) (net.Listener, error) {
32+
lc := newListenConfig()
33+
return lc.Listen(ctx, "tcp", addr)
34+
}

agent/otlpbridge/socket_windows.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//go:build windows
2+
3+
package otlpbridge
4+
5+
import (
6+
"context"
7+
"net"
8+
)
9+
10+
// listen creates a TCP listener using standard net.Listen.
11+
// Windows doesn't need SO_REUSEADDR configuration like Unix systems do.
12+
func listen(ctx context.Context, addr string) (net.Listener, error) {
13+
// On Windows, we use standard Listen without SO_REUSEADDR
14+
// Windows handles port reuse differently and doesn't have the same TIME_WAIT issues
15+
var lc net.ListenConfig
16+
return lc.Listen(ctx, "tcp", addr)
17+
}

cmd/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ func Run(_ *cobra.Command, _ []string) {
114114
logger.Warn("stop signal received stopping agent")
115115
a.Stop(rootCtx)
116116
cancelFunc()
117+
done <- true
118+
return
117119
case <-rootCtx.Done():
118120
logger.Warn("mainRoutine context cancelled")
119121
done <- true

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ require (
2323
go.opentelemetry.io/otel/sdk v1.38.0
2424
go.opentelemetry.io/otel/sdk/log v0.14.0
2525
go.opentelemetry.io/proto/otlp v1.7.1
26+
golang.org/x/sys v0.38.0
2627
google.golang.org/grpc v1.75.0
2728
google.golang.org/protobuf v1.36.8
2829
gopkg.in/yaml.v3 v3.0.1
@@ -111,7 +112,6 @@ require (
111112
golang.org/x/crypto v0.45.0 // indirect
112113
golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect
113114
golang.org/x/net v0.47.0 // indirect
114-
golang.org/x/sys v0.38.0 // indirect
115115
golang.org/x/text v0.31.0 // indirect
116116
golang.org/x/time v0.11.0 // indirect
117117
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect

0 commit comments

Comments
 (0)