Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions agent/backend/pktvisor/pktvisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ func parsePktvisorEntity(line string) (entity, name, rest string, ok bool) {
func (p *pktvisorBackend) Stop(ctx context.Context) error {
p.logger.Info("routine call to stop pktvisor", "routine", ctx.Value(config.ContextKey("routine")))
defer p.cancelFunc()

err := p.proc.Stop()
finalStatus := <-p.statusChan
if err != nil {
Expand All @@ -336,6 +337,15 @@ func (p *pktvisorBackend) Configure(logger *slog.Logger, repo policies.PolicyRep
p.adminAPIPort = defaultAPIPort
p.agentLabels = common.Otlp.AgentLabels

// Clean up old temp config file if it exists
if p.configFile != "" {
if err := os.Remove(p.configFile); err != nil && !os.IsNotExist(err) {
p.logger.Warn("failed to remove old pktvisor temp config file",
"file", p.configFile,
"error", err)
}
}

// Create temp config file
tmpDir := os.TempDir()
tmpFile, err := os.CreateTemp(tmpDir, "pktvisor-*.yaml")
Expand Down Expand Up @@ -430,8 +440,7 @@ func (p *pktvisorBackend) FullReset(ctx context.Context) error {
return err
}
}

// for each policy, restart the scraper
// create a new context for the backend
backendCtx, cancelFunc := context.WithCancel(context.WithValue(ctx, config.ContextKey("routine"), "pktvisor"))

// start it
Expand Down
5 changes: 2 additions & 3 deletions agent/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ FROM python:3.14-slim-trixie

RUN \
apt update && \
apt install --yes --no-install-recommends nmap openssh-client && \
apt install --yes --no-install-recommends nmap openssh-client tini && \
rm -rf /var/lib/apt/lists/*

RUN mkdir -p /opt/orb
Expand All @@ -70,7 +70,6 @@ COPY --from=snmp-discovery /usr/local/bin/snmp-discovery /usr/local/bin/snmp-dis

COPY --from=builder /build/orb-agent /usr/local/bin/orb-agent
COPY --from=builder /src/orb-agent/agent/docker/orb-agent-entry.sh /usr/local/bin/orb-agent-entry.sh
COPY --from=builder /src/orb-agent/agent/docker/run-agent.sh /run-agent.sh
COPY --from=builder /src/orb-agent/agent/docker/default_config.yaml /opt/orb/default_config.yaml

ENTRYPOINT [ "/usr/local/bin/orb-agent-entry.sh" ]
ENTRYPOINT [ "/usr/bin/tini", "--", "/usr/local/bin/orb-agent-entry.sh" ]
43 changes: 7 additions & 36 deletions agent/docker/orb-agent-entry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,6 @@
# entry point for orb-agent
#

agentstop1 () {
printf "\rFinishing container.."
exit 0
}

agentstop2 () {
if [ -f "/var/run/orb-agent.pid" ]; then
ID=$(cat /var/run/orb-agent.pid)
kill -15 $ID
fi
}

if [ "${INSTALL_DRIVERS_PATH}" != '' ]; then
cd "$(dirname "$(realpath "${INSTALL_DRIVERS_PATH}")")"
echo "Installing additional drivers"
Expand Down Expand Up @@ -74,28 +62,11 @@ if [ -n "${FLEET_CLIENT_ID}" ] && [ -n "${FLEET_CLIENT_SECRET}" ]; then
fi
fi

trap agentstop1 SIGINT
trap agentstop2 SIGTERM
# Default to 'run' subcommand if no args provided (preserve backward compatibility)
if [ ${#agent_args[@]} -eq 0 ]; then
agent_args=(run)
fi

# eternal loop
while true
do
# pid file dont exist
if [ ! -f "/var/run/orb-agent.pid" ]; then
# running orb-agent in background
nohup /run-agent.sh "${agent_args[@]}" &
sleep 2
if [ -d "/nohup.out" ]; then
tail -f /nohup.out &
fi
else
PID=$(cat /var/run/orb-agent.pid)
if [ ! -d "/proc/$PID" ]; then
# stop container
echo "$PID is not running"
rm /var/run/orb-agent.pid
exit 1
fi
sleep 5
fi
done
# Use exec to replace this shell process with the agent
# This makes the agent a direct child of tini, ensuring proper signal handling
exec /usr/local/bin/orb-agent "${agent_args[@]}"
12 changes: 0 additions & 12 deletions agent/docker/run-agent.sh

This file was deleted.

5 changes: 3 additions & 2 deletions agent/otlpbridge/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ func (s *BridgeServer) GetPolicyRepo() policies.PolicyRepo {

// Start starts the gRPC server without establishing MQTT.
// Publisher and topic should be set before OTLP data arrives.
func (s *BridgeServer) Start(_ context.Context) error {
lis, err := net.Listen("tcp", s.cfg.ListenAddr)
func (s *BridgeServer) Start(ctx context.Context) error {
// Platform-specific socket configuration (SO_REUSEADDR on Unix for faster port reuse)
lis, err := listen(ctx, s.cfg.ListenAddr)
if err != nil {
return fmt.Errorf("failed to listen on %s (port may be in use by another service): %w", s.cfg.ListenAddr, err)
}
Expand Down
34 changes: 34 additions & 0 deletions agent/otlpbridge/socket_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//go:build unix

package otlpbridge

import (
"context"
"net"
"syscall"

"golang.org/x/sys/unix"
)

// newListenConfig returns a net.ListenConfig with SO_REUSEADDR enabled for faster port reuse.
// This is particularly important for docker restart scenarios where ports may be in TIME_WAIT.
func newListenConfig() net.ListenConfig {
return net.ListenConfig{
Control: func(_, _ string, c syscall.RawConn) error {
var sockOptErr error
if err := c.Control(func(fd uintptr) {
// Enable SO_REUSEADDR to allow binding to TIME_WAIT sockets
sockOptErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_REUSEADDR, 1)
}); err != nil {
return err
}
return sockOptErr
},
}
}

// listen creates a TCP listener with platform-specific socket options.
func listen(ctx context.Context, addr string) (net.Listener, error) {
lc := newListenConfig()
return lc.Listen(ctx, "tcp", addr)
}
17 changes: 17 additions & 0 deletions agent/otlpbridge/socket_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//go:build windows

package otlpbridge

import (
"context"
"net"
)

// listen creates a TCP listener using standard net.Listen.
// Windows doesn't need SO_REUSEADDR configuration like Unix systems do.
func listen(ctx context.Context, addr string) (net.Listener, error) {
// On Windows, we use standard Listen without SO_REUSEADDR
// Windows handles port reuse differently and doesn't have the same TIME_WAIT issues
var lc net.ListenConfig
return lc.Listen(ctx, "tcp", addr)
}
2 changes: 2 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ func Run(_ *cobra.Command, _ []string) {
logger.Warn("stop signal received stopping agent")
a.Stop(rootCtx)
cancelFunc()
done <- true
return
case <-rootCtx.Done():
logger.Warn("mainRoutine context cancelled")
done <- true
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/sdk/log v0.14.0
go.opentelemetry.io/proto/otlp v1.7.1
golang.org/x/sys v0.38.0
google.golang.org/grpc v1.75.0
google.golang.org/protobuf v1.36.8
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -111,7 +112,6 @@ require (
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect
golang.org/x/time v0.11.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
Expand Down
Loading